From e2e7b0a2cd32cb44b1806a6f0961599aef9cd0e9 Mon Sep 17 00:00:00 2001 From: Igor K Date: Fri, 24 May 2019 01:42:03 +0300 Subject: [PATCH 001/325] Rename reservation tests from *.sh to *.ksh Reviewed-by: Richard Elling Reviewed-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #8729 --- .../tests/functional/reservation/Makefile.am | 44 +++++++++---------- ...ion_001_pos.sh => reservation_001_pos.ksh} | 0 ...ion_002_pos.sh => reservation_002_pos.ksh} | 0 ...ion_003_pos.sh => reservation_003_pos.ksh} | 0 ...ion_004_pos.sh => reservation_004_pos.ksh} | 0 ...ion_005_pos.sh => reservation_005_pos.ksh} | 0 ...ion_006_pos.sh => reservation_006_pos.ksh} | 0 ...ion_007_pos.sh => reservation_007_pos.ksh} | 0 ...ion_008_pos.sh => reservation_008_pos.ksh} | 0 ...ion_009_pos.sh => reservation_009_pos.ksh} | 0 ...ion_010_pos.sh => reservation_010_pos.ksh} | 0 ...ion_011_pos.sh => reservation_011_pos.ksh} | 0 ...ion_012_pos.sh => reservation_012_pos.ksh} | 0 ...ion_013_pos.sh => reservation_013_pos.ksh} | 0 ...ion_014_pos.sh => reservation_014_pos.ksh} | 0 ...ion_015_pos.sh => reservation_015_pos.ksh} | 0 ...ion_016_pos.sh => reservation_016_pos.ksh} | 0 ...ion_017_pos.sh => reservation_017_pos.ksh} | 0 ...ion_018_pos.sh => reservation_018_pos.ksh} | 0 ...ion_019_pos.sh => reservation_019_pos.ksh} | 0 ...ion_020_pos.sh => reservation_020_pos.ksh} | 0 ...ion_021_neg.sh => reservation_021_neg.ksh} | 0 ...ion_022_pos.sh => reservation_022_pos.ksh} | 0 23 files changed, 22 insertions(+), 22 deletions(-) rename tests/zfs-tests/tests/functional/reservation/{reservation_001_pos.sh => reservation_001_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_002_pos.sh => reservation_002_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_003_pos.sh => reservation_003_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_004_pos.sh => reservation_004_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_005_pos.sh => reservation_005_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_006_pos.sh => reservation_006_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_007_pos.sh => reservation_007_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_008_pos.sh => reservation_008_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_009_pos.sh => reservation_009_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_010_pos.sh => reservation_010_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_011_pos.sh => reservation_011_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_012_pos.sh => reservation_012_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_013_pos.sh => reservation_013_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_014_pos.sh => reservation_014_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_015_pos.sh => reservation_015_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_016_pos.sh => reservation_016_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_017_pos.sh => reservation_017_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_018_pos.sh => reservation_018_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_019_pos.sh => reservation_019_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_020_pos.sh => reservation_020_pos.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_021_neg.sh => reservation_021_neg.ksh} (100%) rename tests/zfs-tests/tests/functional/reservation/{reservation_022_pos.sh => reservation_022_pos.ksh} (100%) diff --git a/tests/zfs-tests/tests/functional/reservation/Makefile.am b/tests/zfs-tests/tests/functional/reservation/Makefile.am index 9b02867b972..8eaf0986155 100644 --- a/tests/zfs-tests/tests/functional/reservation/Makefile.am +++ b/tests/zfs-tests/tests/functional/reservation/Makefile.am @@ -2,28 +2,28 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/reservation dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ - reservation_001_pos.sh \ - reservation_002_pos.sh \ - reservation_003_pos.sh \ - reservation_004_pos.sh \ - reservation_005_pos.sh \ - reservation_006_pos.sh \ - reservation_007_pos.sh \ - reservation_008_pos.sh \ - reservation_009_pos.sh \ - reservation_010_pos.sh \ - reservation_011_pos.sh \ - reservation_012_pos.sh \ - reservation_013_pos.sh \ - reservation_014_pos.sh \ - reservation_015_pos.sh \ - reservation_016_pos.sh \ - reservation_017_pos.sh \ - reservation_018_pos.sh \ - reservation_019_pos.sh \ - reservation_020_pos.sh \ - reservation_021_neg.sh \ - reservation_022_pos.sh + reservation_001_pos.ksh \ + reservation_002_pos.ksh \ + reservation_003_pos.ksh \ + reservation_004_pos.ksh \ + reservation_005_pos.ksh \ + reservation_006_pos.ksh \ + reservation_007_pos.ksh \ + reservation_008_pos.ksh \ + reservation_009_pos.ksh \ + reservation_010_pos.ksh \ + reservation_011_pos.ksh \ + reservation_012_pos.ksh \ + reservation_013_pos.ksh \ + reservation_014_pos.ksh \ + reservation_015_pos.ksh \ + reservation_016_pos.ksh \ + reservation_017_pos.ksh \ + reservation_018_pos.ksh \ + reservation_019_pos.ksh \ + reservation_020_pos.ksh \ + reservation_021_neg.ksh \ + reservation_022_pos.ksh dist_pkgdata_DATA = \ reservation.cfg \ diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_001_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_002_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_003_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_004_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_005_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_006_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_007_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_008_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_009_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_010_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_011_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_011_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_011_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_011_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_012_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_012_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_012_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_012_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_013_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_014_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_015_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_016_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_017_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_017_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_017_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_017_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_018_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_019_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_019_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_019_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_019_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_020_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_020_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_020_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_020_pos.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.sh b/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_021_neg.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_022_pos.sh b/tests/zfs-tests/tests/functional/reservation/reservation_022_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/reservation/reservation_022_pos.sh rename to tests/zfs-tests/tests/functional/reservation/reservation_022_pos.ksh From cc434dcf451ef9145dd7777ffe2c2a3ae3a276e4 Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 04:17:00 +0200 Subject: [PATCH 002/325] Fix coverity defects: CID 186143 CID 186143: Memory - illegal accesses (USE_AFTER_FREE) This patch fixes an use-after-free in spa_import_progress_destroy() moving the kmem_free() call at the end of the function. Reviewed-by: Chris Dunlop Reviewed-by: Giuseppe Di Natale Reviewed-by: Igor Kozhukhov Reviewed-by: George Melikov Reviewed by: Brian Behlendorf Signed-off-by: loli10K Closes #8788 --- module/zfs/spa_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index dddbe9cfa0e..a111a9e4e61 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2102,8 +2102,8 @@ spa_import_progress_destroy(void) spa_history_list_t *shl = spa_import_progress_list; procfs_list_uninstall(&shl->procfs_list); spa_import_progress_truncate(shl, 0); - kmem_free(shl, sizeof (spa_history_list_t)); procfs_list_destroy(&shl->procfs_list); + kmem_free(shl, sizeof (spa_history_list_t)); } int From abe267f6775e2690e825c1020652e90a8779f18b Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 18:40:46 +0200 Subject: [PATCH 003/325] zpool: trim -p is not a valid option This commit removes the documented but not handled "-p" option from zpool(8) help message. Reviewed-by: George Melikov Reviewed-by: Chris Dunlop Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #8781 --- cmd/zpool/zpool_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 1dac4b167f7..f490675cd60 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -30,6 +30,7 @@ * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, loli10K */ #include @@ -384,7 +385,7 @@ get_usage(zpool_help_t idx) case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: - return (gettext("\ttrim [-dp] [-r ] [-c | -s] " + return (gettext("\ttrim [-d] [-r ] [-c | -s] " "[ ...]\n")); case HELP_STATUS: return (gettext("\tstatus [-c [script1,script2,...]] " From f91e7e6284b33c4fcfa9e63359e7edb896da476c Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 21:17:52 +0200 Subject: [PATCH 004/325] Device removal panics on 32-bit systems The issue is caused by an incorrect usage of the sizeof() operator in vdev_obsolete_sm_object(): on 64-bit systems this is not an issue since both "uint64_t" and "uint64_t*" are 8 bytes in size. However on 32-bit systems pointers are 4 bytes long which is not supported by zap_lookup_impl(). Trying to remove a top-level vdev on a 32-bit system will cause the following failure: VERIFY3(0 == vdev_obsolete_sm_object(vd, &obsolete_sm_object)) failed (0 == 22) PANIC at vdev_indirect.c:833:vdev_indirect_sync_obsolete() Showing stack for process 1315 CPU: 6 PID: 1315 Comm: txg_sync Tainted: P OE 4.4.69+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 c1abc6e7 0ae10898 00000286 d4ac3bc0 c14397bc da4cd7d8 d4ac3bf0 d4ac3bd0 d790e7ce d7911cc1 00000523 d4ac3d00 d790e7d7 d7911ce4 da4cd7d8 00000341 da4ce664 da4cd8c0 da33fa6e 49524556 28335946 3d3d2030 65647620 626f5f76 Call Trace: [<>] dump_stack+0x58/0x7c [<>] spl_dumpstack+0x23/0x27 [spl] [<>] spl_panic.cold.0+0x5/0x41 [spl] [<>] ? dbuf_rele+0x3e/0x90 [zfs] [<>] ? zap_lookup_norm+0xbe/0xe0 [zfs] [<>] ? zap_lookup+0x57/0x70 [zfs] [<>] ? vdev_obsolete_sm_object+0x102/0x12b [zfs] [<>] vdev_indirect_sync_obsolete+0x3e1/0x64d [zfs] [<>] ? txg_verify+0x1d/0x160 [zfs] [<>] ? dmu_tx_create_dd+0x80/0xc0 [zfs] [<>] vdev_sync+0xbf/0x550 [zfs] [<>] ? mutex_lock+0x10/0x30 [<>] ? txg_list_remove+0x9f/0x1a0 [zfs] [<>] ? zap_contains+0x4d/0x70 [zfs] [<>] spa_sync+0x9f1/0x1b10 [zfs] ... [<>] ? kthread_stop+0x110/0x110 This commit simply corrects the "integer_size" parameter used to lookup the vdev's ZAP object. Reviewed-by: Giuseppe Di Natale Reviewed-by: Igor Kozhukhov Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #8790 --- module/zfs/vdev_indirect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 68dfe83128a..4d18e33c0ab 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -15,6 +15,7 @@ /* * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -902,7 +903,7 @@ vdev_obsolete_sm_object(vdev_t *vd, uint64_t *sm_obj) } int error = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, - VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (sm_obj), 1, sm_obj); + VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (uint64_t), 1, sm_obj); if (error == ENOENT) { *sm_obj = 0; error = 0; From 4d7cb872e8d37e546b7d4364fe604d6fbe9094a9 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sat, 25 May 2019 04:26:18 +0900 Subject: [PATCH 005/325] Linux 2.6.39 compat: Test if kstrtoul() exists kstrtoul() exists only after torvalds/linux@33ee3b2e2eb9 in 2.6.39. Use strict_strtoul() if kstrtoul() doesn't exist. Note that strict_strtoul() has existed as an alias for kstrtoul() for a while, but removed in torvalds/linux@3db2e9cdc085. It looks like RHEL6 (2.6.32 based) has backported kstrtoul(), and this caused build CI to pass compilation test. It should fail on vanilla < 2.6.39 kernels or distro kernels without backport as reported in #8760. -- # grep "kstrtoul(" /lib/modules/2.6.32-754.12.1.el6.x86_64/build/ \ include/linux/kernel.h >/dev/null # echo $? 0 Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Tomohiro Kusumi Closes #8760 Closes #8761 --- config/kernel-kstrtoul.m4 | 21 +++++++++++++++++++++ config/kernel.m4 | 1 + include/spl/sys/strings.h | 4 ++++ 3 files changed, 26 insertions(+) create mode 100644 config/kernel-kstrtoul.m4 diff --git a/config/kernel-kstrtoul.m4 b/config/kernel-kstrtoul.m4 new file mode 100644 index 00000000000..5530e0e2d85 --- /dev/null +++ b/config/kernel-kstrtoul.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # 2.6.39 API change +dnl # +dnl # 33ee3b2e2eb9 kstrto*: converting strings to integers done (hopefully) right +dnl # +dnl # If kstrtoul() doesn't exist, fallback to use strict_strtoul() which has +dnl # existed since 2.6.25. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ + AC_MSG_CHECKING([whether kstrtoul() exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + int ret __attribute__ ((unused)) = kstrtoul(NULL, 10, NULL); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 026a5258f9f..eed712ca81a 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -167,6 +167,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_TOTALHIGH_PAGES ZFS_AC_KERNEL_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE + ZFS_AC_KERNEL_KSTRTOUL AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" diff --git a/include/spl/sys/strings.h b/include/spl/sys/strings.h index 4fb80320635..8b810c9af24 100644 --- a/include/spl/sys/strings.h +++ b/include/spl/sys/strings.h @@ -28,4 +28,8 @@ #define bcopy(src, dest, size) memmove(dest, src, size) #define bcmp(src, dest, size) memcmp((src), (dest), (size_t)(size)) +#ifndef HAVE_KSTRTOUL +#define kstrtoul strict_strtoul +#endif + #endif /* _SPL_SYS_STRINGS_H */ From c6bbacebc8d8679f1aa92b83db32ef7b36fc78ae Mon Sep 17 00:00:00 2001 From: siv0 Date: Fri, 24 May 2019 22:17:50 +0200 Subject: [PATCH 006/325] Fix ksh-path for random_readwrite_fixed.ksh The test in zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh is the only file to use /usr/bin/ksh in the shebang. Change it to /bin/ksh for consistency. Reviewed by: John Kennedy Reviewed-by: Giuseppe Di Natale Reviewed-by: Igor Kozhukhov Signed-off-by: Stoiko Ivanov Closes #8779 --- .../zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh index 38c0669f69e..e368ed23677 100755 --- a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh @@ -1,4 +1,4 @@ -#!/usr/bin/ksh +#!/bin/ksh # file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version From cd75d5f7104a920126e4f28be9701dd9241ac191 Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 22:54:36 +0200 Subject: [PATCH 007/325] zfs: missing newline character in zfs_do_channel_program() error message This commit simply adds a missing newline ("\n") character to the error message printed by the zfs command when the provided pool parameter can't be found. Reviewed-by: Chris Dunlop Reviewed-by: Giuseppe Di Natale Reviewed-by: Igor Kozhukhov Reviewed-by: George Melikov Signed-off-by: loli10K Closes #8783 --- cmd/zfs/zfs_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 2d97988a028..6929a224636 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -28,6 +28,7 @@ * Copyright 2016 Igor Kozhukhov . * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2019, loli10K */ #include @@ -7522,7 +7523,7 @@ zfs_do_channel_program(int argc, char **argv) } if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { - (void) fprintf(stderr, gettext("cannot open pool '%s'"), + (void) fprintf(stderr, gettext("cannot open pool '%s'\n"), poolname); if (fd != 0) (void) close(fd); From ad0157ec915be6e6f44e2e6a2565e764593240ca Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 22:58:12 +0200 Subject: [PATCH 008/325] zfs: don't pretty-print objsetid property The objsetid property, while being stored as a number, is a dataset identifier and should not be pretty-printed. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Chris Dunlop Signed-off-by: loli10K Closes #8784 --- lib/libzfs/libzfs_dataset.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index de94021a675..e26b32786db 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -31,6 +31,7 @@ * Copyright 2016 Igor Kozhukhov * Copyright 2017-2018 RackTop Systems. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2019, loli10K */ #include @@ -2969,8 +2970,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, case ZFS_PROP_GUID: case ZFS_PROP_CREATETXG: + case ZFS_PROP_OBJSETID: /* - * GUIDs are stored as numbers, but they are identifiers. + * These properties are stored as numbers, but they are + * identifiers. * We don't want them to be pretty printed, because pretty * printing mangles the ID into a truncated and useless value. */ From 8cfa6d4a1c0699973ec050d8b73ad5e776d1588d Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 23:04:08 +0200 Subject: [PATCH 009/325] zfs-tests: verify zfs(8) and zpool(8) help message is under 80 columns This commit updates the ZFS Test Suite to detect incorrect wrapping of both zfs(8) and zpool(8) help message Reviewed by: John Kennedy Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #8785 --- .../tests/functional/cli_user/misc/zfs_001_neg.ksh | 12 ++++++++---- .../tests/functional/cli_user/misc/zpool_001_neg.ksh | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh index 1073a40308b..46171caf9fb 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh @@ -44,16 +44,20 @@ function cleanup { - if [ -e $TEST_BASE_DIR/zfs_001_neg.$$.txt ] + if [ -e "$TEMPFILE" ] then - rm $TEST_BASE_DIR/zfs_001_neg.$$.txt + rm -f "$TEMPFILE" fi } log_onexit cleanup log_assert "zfs shows a usage message when run as a user" -eval "zfs > $TEST_BASE_DIR/zfs_001_neg.$$.txt 2>&1" -log_must grep "usage: zfs command args" $TEST_BASE_DIR/zfs_001_neg.$$.txt +TEMPFILE="$TEST_BASE_DIR/zfs_001_neg.$$.txt" + +eval "zfs > $TEMPFILE 2>&1" +log_must grep "usage: zfs command args" "$TEMPFILE" + +log_must eval "awk '{if (length(\$0) > 80) exit 1}' < $TEMPFILE" log_pass "zfs shows a usage message when run as a user" diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh index af924837ad8..0fddc08b25d 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh @@ -45,16 +45,20 @@ function cleanup { - if [ -e $TEST_BASE_DIR/zpool_001_neg.$$.txt ] + if [ -e "$TEMPFILE" ] then - rm $TEST_BASE_DIR/zpool_001_neg.$$.txt + rm -f "$TEMPFILE" fi } +TEMPFILE="$TEST_BASE_DIR/zpool_001_neg.$$.txt" + log_onexit cleanup log_assert "zpool shows a usage message when run as a user" -eval "zpool > $TEST_BASE_DIR/zpool_001_neg.$$.txt 2>&1" -log_must grep "usage: zpool command args" $TEST_BASE_DIR/zpool_001_neg.$$.txt +eval "zpool > $TEMPFILE 2>&1" +log_must grep "usage: zpool command args" "$TEMPFILE" + +log_must eval "awk '{if (length(\$0) > 80) exit 1}' < $TEMPFILE" log_pass "zpool shows a usage message when run as a user" From 438275c9a09c3f0b0b43ea17b1cceca8a83666d4 Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 23:06:53 +0200 Subject: [PATCH 010/325] VERIFY3P() message is missing a space character This commit just reintroduces a [space] character inadvertently removed in a887d653. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Chris Dunlop Signed-off-by: loli10K Closes #8786 --- include/spl/sys/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/spl/sys/debug.h b/include/spl/sys/debug.h index b17d77d280a..ecda6bcb895 100644 --- a/include/spl/sys/debug.h +++ b/include/spl/sys/debug.h @@ -102,7 +102,7 @@ void spl_dumpstack(void); if (!(_verify3_left OP _verify3_right)) \ spl_panic(__FILE__, __FUNCTION__, __LINE__, \ "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \ - "failed (%px" #OP " %px)\n", \ + "failed (%px " #OP " %px)\n", \ (void *) (_verify3_left), \ (void *) (_verify3_right)); \ } while (0) From e0b3689ed5b3035b86adfccec5e5e7f1f8696804 Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 23:12:14 +0200 Subject: [PATCH 011/325] zfs-tests: fix warnings when packaging some .shlib files This change prevents the following warning when packaging some zfs-tests files: *** WARNING: ./usr/src/zfs-0.8.0/tests/zfs-tests/include/zpool_script.shlib is executable but has empty or no shebang, removing executable bit Reviewed by: John Kennedy Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Reviewed-by: Giuseppe Di Natale Signed-off-by: loli10K Closes #8787 --- Makefile.am | 3 ++- tests/zfs-tests/include/zpool_script.shlib | 0 .../tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib | 0 3 files changed, 2 insertions(+), 1 deletion(-) mode change 100755 => 100644 tests/zfs-tests/include/zpool_script.shlib mode change 100755 => 100644 tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib diff --git a/Makefile.am b/Makefile.am index b4416c7492f..1ec2514922a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -111,9 +111,10 @@ mancheck: fi testscheck: - @find ${top_srcdir}/tests/zfs-tests/tests -type f \ + @find ${top_srcdir}/tests/zfs-tests -type f \ \( -name '*.ksh' -not -executable \) -o \ \( -name '*.kshlib' -executable \) -o \ + \( -name '*.shlib' -executable \) -o \ \( -name '*.cfg' -executable \) | \ xargs -r stat -c '%A %n' | \ awk '{c++; print} END {if(c>0) exit 1}' diff --git a/tests/zfs-tests/include/zpool_script.shlib b/tests/zfs-tests/include/zpool_script.shlib old mode 100755 new mode 100644 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib old mode 100755 new mode 100644 From df717bb83538f77d0f05e06cea29ab5d82bf6280 Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 24 May 2019 23:16:00 +0200 Subject: [PATCH 012/325] zpool: status -t is not documented in help message This commit adds the undocumented "-t" option to zpool(8) help message. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Chris Dunlop Signed-off-by: loli10K Closes #8782 --- cmd/zpool/zpool_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index f490675cd60..5d319147646 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -389,7 +389,7 @@ get_usage(zpool_help_t idx) "[ ...]\n")); case HELP_STATUS: return (gettext("\tstatus [-c [script1,script2,...]] " - "[-igLpPsvxD] [-T d|u] [pool] ... \n" + "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" From 8ec352be1f0866858e52f155f5b1a9faaa3ccacf Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sun, 26 May 2019 05:28:56 +0900 Subject: [PATCH 013/325] Linux 5.2 compat: Remove config/kernel-set-fs-pwd.m4 This failed on 5.2-rc1 with "error: unknown" message, for set_fs_pwd() not being visible in both const and non-const tests. This is caused by torvalds/linux@83da1bed86. It's configurable, but we would want to be able to compile with default kbuild setting. set_fs_pwd() has never been exported with exception of some distro kernels, and set_fs_pwd() wasn't used in ZoL to begin with. The test result was used for a spl function vn_set_fs_pwd(). Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Tomohiro Kusumi Closes #8777 --- config/kernel-set-fs-pwd.m4 | 39 ------------------------------------- config/kernel.m4 | 1 - module/spl/spl-vnode.c | 4 ---- 3 files changed, 44 deletions(-) delete mode 100644 config/kernel-set-fs-pwd.m4 diff --git a/config/kernel-set-fs-pwd.m4 b/config/kernel-set-fs-pwd.m4 deleted file mode 100644 index d5565b42cb5..00000000000 --- a/config/kernel-set-fs-pwd.m4 +++ /dev/null @@ -1,39 +0,0 @@ -dnl # -dnl # 3.9 API change -dnl # set_fs_pwd takes const struct path * -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_FS_PWD_WITH_CONST], - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - [AC_MSG_CHECKING([whether set_fs_pwd() requires const struct path *]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - void (*const set_fs_pwd_func) - (struct fs_struct *, const struct path *) - = set_fs_pwd; - ],[ - return 0; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_FS_PWD_WITH_CONST, 1, - [set_fs_pwd() needs const path *]) - ],[ - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - void (*const set_fs_pwd_func) - (struct fs_struct *, struct path *) - = set_fs_pwd; - ],[ - return 0; - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_ERROR(unknown) - ]) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index eed712ca81a..78d3a90a7f6 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -11,7 +11,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_CTL_NAME ZFS_AC_KERNEL_PDE_DATA - ZFS_AC_KERNEL_SET_FS_PWD_WITH_CONST ZFS_AC_KERNEL_2ARGS_VFS_FSYNC ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK ZFS_AC_KERNEL_KUIDGID_T diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index ef5f6054044..11b5e4e5a2f 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -643,11 +643,7 @@ EXPORT_SYMBOL(areleasef); static void -#ifdef HAVE_SET_FS_PWD_WITH_CONST -vn_set_fs_pwd(struct fs_struct *fs, const struct path *path) -#else vn_set_fs_pwd(struct fs_struct *fs, struct path *path) -#endif /* HAVE_SET_FS_PWD_WITH_CONST */ { struct path old_pwd; From a727f69e521f1bf80ff2926481d918a25e8bae03 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sun, 26 May 2019 05:40:46 +0900 Subject: [PATCH 014/325] Linux 5.2 compat: Fix config/kernel-shrink.m4 test failure "whether ->count_objects callback exists" test failed with "error: error" message for using an incomplete function shrinker_cb(). This is caused by torvalds/linux@83da1bed86. It's configurable, but we would want to be able to compile with default kbuild setting. Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Tomohiro Kusumi Closes #8776 --- config/kernel-shrink.m4 | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 37da0ec721a..405cbf42cf3 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -144,7 +144,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ ZFS_LINUX_TRY_COMPILE([ #include - int shrinker_cb(int nr_to_scan, gfp_t gfp_mask); + int shrinker_cb(int nr_to_scan, gfp_t gfp_mask) { + return 0; + } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, @@ -166,8 +168,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ ZFS_LINUX_TRY_COMPILE([ #include - int shrinker_cb(struct shrinker *, int nr_to_scan, - gfp_t gfp_mask); + int shrinker_cb(struct shrinker *shrink, int nr_to_scan, + gfp_t gfp_mask) { + return 0; + } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, @@ -190,8 +194,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ ZFS_LINUX_TRY_COMPILE([ #include - int shrinker_cb(struct shrinker *, - struct shrink_control *sc); + int shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { + return 0; + } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, @@ -215,8 +221,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ #include unsigned long shrinker_cb( - struct shrinker *, - struct shrink_control *sc); + struct shrinker *shrink, + struct shrink_control *sc) { + return 0; + } ],[ struct shrinker cache_shrinker = { .count_objects = shrinker_cb, From 2fb37bcadd08d5d7524a370765d926df2f2e18d9 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sun, 26 May 2019 05:42:09 +0900 Subject: [PATCH 015/325] Linux 5.2 compat: Directly call wait_on_page_bit() wait_on_page_writeback() was made GPL only in torvalds/linux@19343b5bdd. Directly call wait_on_page_bit() without using wait_on_page_writeback() interface, given zfs_putpage() is the only caller for now. Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Tomohiro Kusumi Closes #8794 --- module/zfs/zfs_vnops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 41c1bd25575..885d9633b01 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -4526,8 +4526,10 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) unlock_page(pp); rangelock_exit(lr); - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(pp); + if (wbc->sync_mode != WB_SYNC_NONE) { + if (PageWriteback(pp)) + wait_on_page_bit(pp, PG_writeback); + } ZFS_EXIT(zfsvfs); return (0); From b746c397e3fd7e72083d22d7ac1beb911679dac4 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Sat, 25 May 2019 16:46:32 -0400 Subject: [PATCH 016/325] Disable parallel processing for 'zfs mount -l' Currently, 'zfs mount -a' will always attempt to parallelize work related to mounting as best it can. Unfortunately, when the user passes the '-l' option to load keys, this causes all threads to prompt the user for their keys at once, resulting in a confusing and racy user experience. This patch simply disables parallel mounting when using the '-l' flag. Reviewed by: Sebastien Roy Reviewed by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8762 Closes #8811 --- cmd/zfs/zfs_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 6929a224636..c85154479cb 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6622,10 +6622,13 @@ share_mount(int op, int argc, char **argv) /* * libshare isn't mt-safe, so only do the operation in parallel - * if we're mounting. + * if we're mounting. Additionally, the key-loading option must + * be serialized so that we can prompt the user for their keys + * in a consistent manner. */ zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, - share_mount_one_cb, &share_mount_state, op == OP_MOUNT); + share_mount_one_cb, &share_mount_state, + op == OP_MOUNT && !(flags & MS_CRYPT)); ret = share_mount_state.sm_status; for (int i = 0; i < cb.cb_used; i++) From 69ae34076f227478a505438d958578eb30548443 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Sat, 25 May 2019 16:52:23 -0400 Subject: [PATCH 017/325] Fix embedded bp accounting in count_block() Currently, count_block() does not correctly account for the possibility that the bp that is passed to it could be embedded. These blocks shouldn't be counted since the work of scanning these blocks in already handled when the containing block is scanned. This patch simply resolves this issue by returning early in this case. Reviewed by: Allan Jude Reviewed-by: Brian Behlendorf Authored-by: Bill Sommerfeld Signed-off-by: Tom Caputi Closes #8800 Closes #8766 --- module/zfs/dsl_scan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index eee122aa6d2..b15c39ac9ca 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -3629,6 +3629,13 @@ count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp) { int i; + /* + * Don't count embedded bp's, since we already did the work of + * scanning these when we scanned the containing block. + */ + if (BP_IS_EMBEDDED(bp)) + return; + /* * Update the spa's stats on how many bytes we have issued. * Sequential scrubs create a zio for each DVA of the bp. Each From 51de7ccb4274b254dc27053ab87d02884d9b9ba3 Mon Sep 17 00:00:00 2001 From: loli10K Date: Tue, 28 May 2019 20:14:58 +0200 Subject: [PATCH 018/325] Endless loop in zpool_do_remove() on platforms with unsigned char On systems where "char" is an unsigned type the value returned by getopt() will never be negative (-1), leading to an endless loop: this issue prevents both 'zpool remove' and 'zstreamdump' for working on some systems. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Reviewed-by: Chris Dunlop Signed-off-by: loli10K Closes #8789 --- cmd/zfs/zfs_main.c | 4 ++-- cmd/zpool/zpool_main.c | 2 +- cmd/zstreamdump/zstreamdump.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index c85154479cb..d75f089acd1 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -2239,7 +2239,7 @@ zfs_do_upgrade(int argc, char **argv) boolean_t showversions = B_FALSE; int ret = 0; upgrade_cbdata_t cb = { 0 }; - signed char c; + int c; int flags = ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ @@ -3933,7 +3933,7 @@ static int zfs_do_snapshot(int argc, char **argv) { int ret = 0; - signed char c; + int c; nvlist_t *props; snap_cbdata_t sd = { 0 }; boolean_t multiple_snaps = B_FALSE; diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 5d319147646..2cb6774b9ad 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -973,7 +973,7 @@ zpool_do_remove(int argc, char **argv) int i, ret = 0; zpool_handle_t *zhp = NULL; boolean_t stop = B_FALSE; - char c; + int c; boolean_t noop = B_FALSE; boolean_t parsable = B_FALSE; diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index ed88729b518..a162eceda58 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -237,7 +237,7 @@ main(int argc, char *argv[]) struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; - char c; + int c; boolean_t verbose = B_FALSE; boolean_t very_verbose = B_FALSE; boolean_t first = B_TRUE; From 0c6206e7f181698fd48a4605abfa29660de277a2 Mon Sep 17 00:00:00 2001 From: Stoiko Ivanov Date: Thu, 23 May 2019 15:22:27 +0200 Subject: [PATCH 019/325] test-runner.py: change shebang to python3 In commit 6e72a5b9b61066146deafda39ab8158c559f5f15 python scripts which work with python2 and python3 changed the shebang from /usr/bin/python to /usr/bin/python3. This gets adapted by the build-system on systems which don't provide python3. This commit changes test-runner.py to also use /usr/bin/python3, enabling the change during buildtime and fixing a minor lintian issue for those Debian packages, which depend on a specific python version (python3/python2). Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Reviewed by: John Kennedy Signed-off-by: Stoiko Ivanov Closes #8803 --- tests/test-runner/bin/test-runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-runner/bin/test-runner.py b/tests/test-runner/bin/test-runner.py index ea37e8ab6f8..4d4fd96ad77 100755 --- a/tests/test-runner/bin/test-runner.py +++ b/tests/test-runner/bin/test-runner.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # This file and its contents are supplied under the terms of the From 27b446f799234b13ff3231142a03f1e37065cafd Mon Sep 17 00:00:00 2001 From: Stoiko Ivanov Date: Thu, 23 May 2019 15:32:53 +0200 Subject: [PATCH 020/325] tests: fix cosmetic permission issues during `make install` files in dist_*_SCRIPTS get installed with 0755, those in dist_*_DATA with 0644. This commit moves all .kshlib, .shlib and .cfg files in the testsuite to dist_pkgdata_DATA, and removes the shebang from zpool_import.kshlib. This ensures that the files are installed with appropriate permissions and silences some warnings from lintian Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Reviewed by: John Kennedy Signed-off-by: Stoiko Ivanov Closes #8803 --- tests/zfs-tests/tests/functional/alloc_class/Makefile.am | 6 ++++-- .../functional/cli_root/zpool_import/zpool_import.kshlib | 2 -- .../tests/functional/cli_root/zpool_initialize/Makefile.am | 1 - .../tests/functional/cli_root/zpool_trim/Makefile.am | 4 +++- tests/zfs-tests/tests/functional/removal/Makefile.am | 5 ++++- tests/zfs-tests/tests/perf/Makefile.am | 2 +- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am index 073eac9882e..7cffb2eac45 100644 --- a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am +++ b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am @@ -1,7 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/alloc_class dist_pkgdata_SCRIPTS = \ - alloc_class.cfg \ - alloc_class.kshlib \ setup.ksh \ cleanup.ksh \ alloc_class_001_pos.ksh \ @@ -17,3 +15,7 @@ dist_pkgdata_SCRIPTS = \ alloc_class_011_neg.ksh \ alloc_class_012_pos.ksh \ alloc_class_013_pos.ksh + +dist_pkgdata_DATA = \ + alloc_class.cfg \ + alloc_class.kshlib diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib index f53b88f794e..d050145e44f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib @@ -1,5 +1,3 @@ -#!/bin/ksh - # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am index a0a0e0b5cfa..2ebc376d9cb 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am @@ -3,7 +3,6 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zpool_initialize_attach_detach_add_remove.ksh \ zpool_initialize_import_export.ksh \ - zpool_initialize.kshlib \ zpool_initialize_offline_export_import_online.ksh \ zpool_initialize_online_offline.ksh \ zpool_initialize_split.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am index c357eeffb33..d2d3b4ae88b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am @@ -2,7 +2,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_trim dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ - zpool_trim.kshlib \ zpool_trim_attach_detach_add_remove.ksh \ zpool_trim_import_export.ksh \ zpool_trim_multiple.ksh \ @@ -20,3 +19,6 @@ dist_pkgdata_SCRIPTS = \ zpool_trim_unsupported_vdevs.ksh \ zpool_trim_verify_checksums.ksh \ zpool_trim_verify_trimmed.ksh + +dist_pkgdata_DATA = \ + zpool_trim.kshlib diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index c5d013e7c86..ba42b899aca 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -28,6 +28,9 @@ dist_pkgdata_SCRIPTS = \ removal_with_send.ksh removal_with_send_recv.ksh \ removal_with_snapshot.ksh removal_with_write.ksh \ removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \ - remove_raidz.ksh remove_expanded.ksh removal.kshlib + remove_raidz.ksh remove_expanded.ksh + +dist_pkgdata_DATA = \ + removal.kshlib pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal diff --git a/tests/zfs-tests/tests/perf/Makefile.am b/tests/zfs-tests/tests/perf/Makefile.am index 68dd31ec12b..294b136b385 100644 --- a/tests/zfs-tests/tests/perf/Makefile.am +++ b/tests/zfs-tests/tests/perf/Makefile.am @@ -1,5 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/perf -dist_pkgdata_SCRIPTS = \ +dist_pkgdata_DATA = \ nfs-sample.cfg \ perf.shlib From aaf3b30dcf324fa670c4a2615ca243b4e7a0a08c Mon Sep 17 00:00:00 2001 From: loli10K Date: Wed, 29 May 2019 00:19:50 +0200 Subject: [PATCH 021/325] Double-free of encryption wrapping key due to invalid pool properties This commits fixes a double-free in zfs_ioc_pool_create() triggered by specifying an unsupported combination of properties when creating a pool with encryption enabled. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: loli10K Closes #8791 --- module/zfs/zfs_ioctl.c | 21 ++++++++----------- .../zpool_create/zpool_create_encrypted.ksh | 5 +++++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index debe733dab7..f30d0a89441 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1514,6 +1514,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *zplprops = NULL; dsl_crypto_params_t *dcp = NULL; char *spa_name = zc->zc_name; + boolean_t unload_wkey = B_TRUE; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) @@ -1541,11 +1542,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); if (nvl) { error = nvlist_dup(nvl, &rootprops, KM_SLEEP); - if (error != 0) { - nvlist_free(config); - nvlist_free(props); - return (error); - } + if (error != 0) + goto pool_props_bad; (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } @@ -1553,11 +1551,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) &hidden_args); error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, rootprops, hidden_args, &dcp); - if (error != 0) { - nvlist_free(config); - nvlist_free(props); - return (error); - } + if (error != 0) + goto pool_props_bad; (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -1577,15 +1572,17 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) * Set the remaining root properties */ if (!error && (error = zfs_set_prop_nvlist(spa_name, - ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) + ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) { (void) spa_destroy(spa_name); + unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */ + } pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); - dsl_crypto_params_free(dcp, !!error); + dsl_crypto_params_free(dcp, unload_wkey && !!error); return (error); } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh index aa154d5c65c..e521d8f1cff 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh @@ -45,6 +45,7 @@ # N 1 1 no keyformat given, but crypt off # Y 0 0 no no keyformat specified for new key # Y 0 1 no no keyformat specified for new key +# Y 1 1 no unsupported combination of non-encryption props # Y 1 0 yes new encryption root # Y 1 1 yes new encryption root # @@ -83,6 +84,10 @@ log_mustnot zpool create -O encryption=on $TESTPOOL $DISKS log_mustnot zpool create -O encryption=on -O keylocation=prompt \ $TESTPOOL $DISKS +log_mustnot eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt" \ + "-o feature@lz4_compress=disabled -O compression=lz4 $TESTPOOL $DISKS" + log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ "-O keyformat=passphrase $TESTPOOL $DISKS" log_must zpool destroy $TESTPOOL From 580256045b2970778e7543f1c00faa7ac25d3f98 Mon Sep 17 00:00:00 2001 From: madz Date: Wed, 29 May 2019 19:17:25 +0200 Subject: [PATCH 022/325] Fix integer overflow in get_next_chunk() dn->dn_datablksz type is uint32_t and need to be casted to uint64_t to avoid an overflow when the record size is greater than 4 MiB. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Olivier Mazouffre Closes #8778 Closes #8797 --- module/zfs/dmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1697a632078..a283b062238 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -719,8 +719,8 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) uint64_t blks; uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1); /* bytes of data covered by a level-1 indirect block */ - uint64_t iblkrange = - dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); + uint64_t iblkrange = (uint64_t)dn->dn_datablksz * + EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); ASSERT3U(minimum, <=, *start); From a1eaf0dde05b33d54732b4145afc7d7123f4f082 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 29 May 2019 11:35:50 -0700 Subject: [PATCH 023/325] Exclude log device ashift from normal class When opening a log device during import its allocation bias will not yet have been set by vdev_load(). This results in the log device's ashift being incorrectly applied to the maximum ashift of the vdevs in the normal class. Which in turn prevents the removal of any top-level devices due to the ashift check in the spa_vdev_remove_top_check() function. This issue is resolved by including vdev_islog in the check since it will be set correctly during vdev_open(). Reviewed-by: Matt Ahrens Reviewed-by: Igor Kozhukhov Signed-off-by: Brian Behlendorf Closes #8735 --- module/zfs/vdev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 085ae687315..1c4812cd86d 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1852,13 +1852,10 @@ vdev_open(vdev_t *vd) /* * Track the min and max ashift values for normal data devices. - * - * DJB - TBD these should perhaps be tracked per allocation class - * (e.g. spa_min_ashift is used to round up post compression buffers) */ if (vd->vdev_top == vd && vd->vdev_ashift != 0 && vd->vdev_alloc_bias == VDEV_BIAS_NONE && - vd->vdev_aux == NULL) { + vd->vdev_islog == 0 && vd->vdev_aux == NULL) { if (vd->vdev_ashift > spa->spa_max_ashift) spa->spa_max_ashift = vd->vdev_ashift; if (vd->vdev_ashift < spa->spa_min_ashift) From 94866d8309acbf025d1638a0d1cf34254cf799a5 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 30 May 2019 08:26:46 +0900 Subject: [PATCH 024/325] Add link count test for root inode Add tests for 97aa3ba44("Fix link count of root inode when snapdir is visible") as suggested in #8727. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #8732 --- tests/runfiles/linux.run | 2 +- .../tests/functional/link_count/Makefile.am | 3 +- .../link_count/link_count_root_inode.ksh | 119 ++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8219cf42b10..22fc26212c0 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -635,7 +635,7 @@ tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count', tags = ['functional', 'limits'] [tests/functional/link_count] -tests = ['link_count_001'] +tests = ['link_count_001', 'link_count_root_inode.ksh'] tags = ['functional', 'link_count'] [tests/functional/migration] diff --git a/tests/zfs-tests/tests/functional/link_count/Makefile.am b/tests/zfs-tests/tests/functional/link_count/Makefile.am index 669f3c142c8..bfb7154a651 100644 --- a/tests/zfs-tests/tests/functional/link_count/Makefile.am +++ b/tests/zfs-tests/tests/functional/link_count/Makefile.am @@ -2,4 +2,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/link_count dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ - link_count_001.ksh + link_count_001.ksh \ + link_count_root_inode.ksh diff --git a/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh b/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh new file mode 100755 index 00000000000..d2bf30ac37c --- /dev/null +++ b/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify root inode (directory) has correct link count. +# +# STRATEGY: +# 1. Create pool and fs. +# 2. Test link count of root inode. +# 3. Create directories and test link count of root inode. +# 4. Delete directories and test link count of root inode. +# 5. Create regular file and test link count of root inode. +# 6. Delete regular file and test link count of root inode. +# + +function assert_link_count +{ + typeset dirpath="$1" + typeset value="$2" + + log_must test "$(ls -ld $dirpath | awk '{ print $2 }')" == "$value" +} + +verify_runnable "both" + +log_note "Verify root inode (directory) has correct link count." + +# Delete a directory from link_count_001.ksh. +if [ -d "${TESTDIR}" -a -d "${TESTDIR}/tmp" ]; then + log_must rm -rf ${TESTDIR}/tmp +fi + +# +# Test with hidden '.zfs' directory. +# This also tests general directories. +# +log_note "Testing with snapdir set to hidden (default)" + +for dst in $TESTPOOL $TESTPOOL/$TESTFS +do + typeset mtpt=$(get_prop mountpoint $dst) + log_must zfs set snapdir=hidden $dst + log_must test -d "$mtpt/.zfs" + if test -n "$(ls $mtpt)"; then + ls $mtpt + log_note "$mtpt not empty, skipping" + continue + fi + assert_link_count $mtpt 2 + + log_must mkdir $mtpt/a + assert_link_count $mtpt 3 + log_must rmdir $mtpt/a + assert_link_count $mtpt 2 + + log_must mkdir -p $mtpt/a/b + assert_link_count $mtpt 3 + log_must rmdir $mtpt/a/b + log_must rmdir $mtpt/a + assert_link_count $mtpt 2 + + log_must touch $mtpt/a + assert_link_count $mtpt 2 + log_must rm $mtpt/a + assert_link_count $mtpt 2 +done + +# +# Test with visible '.zfs' directory. +# +log_note "Testing with snapdir set to visible" + +for dst in $TESTPOOL $TESTPOOL/$TESTFS +do + typeset mtpt=$(get_prop mountpoint $dst) + log_must zfs set snapdir=visible $dst + log_must test -d "$mtpt/.zfs" + if test -n "$(ls $mtpt)"; then + ls $mtpt + log_note "$mtpt not empty, skipping" + continue + fi + assert_link_count $mtpt 3 + + log_must mkdir $mtpt/a + assert_link_count $mtpt 4 + log_must rmdir $mtpt/a + assert_link_count $mtpt 3 + + log_must mkdir -p $mtpt/a/b + assert_link_count $mtpt 4 + log_must rmdir $mtpt/a/b + log_must rmdir $mtpt/a + assert_link_count $mtpt 3 + + log_must touch $mtpt/a + assert_link_count $mtpt 3 + log_must rm $mtpt/a + assert_link_count $mtpt 3 +done + +log_pass "Verify root inode (directory) has correct link count passed" From 4f8eef29e080834613539a5b80910a874b15a76d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 30 May 2019 17:13:18 -0700 Subject: [PATCH 025/325] Revert "Report holes when there are only metadata changes" This reverts commit ec4f9b8f30 which introduced a narrow race which can lead to lseek(, SEEK_DATA) incorrectly returning ENXIO. Resolve the issue by revering this change to restore the previous behavior which depends solely on checking the dirty list. Reviewed-by: Olaf Faaland Reviewed-by: Igor Kozhukhov Signed-off-by: Brian Behlendorf Closes #8816 Closes #8834 --- module/zfs/dmu.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index a283b062238..2d6740576bb 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2373,39 +2373,14 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) return (err); /* - * Check if there are dirty data blocks or frees which have not been - * synced. Dirty spill and bonus blocks which are external to the - * object can ignored when reporting holes. + * Check if dnode is dirty */ - mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { if (multilist_link_active(&dn->dn_dirty_link[i])) { - - if (dn->dn_free_ranges[i] != NULL) { - clean = B_FALSE; - break; - } - - list_t *list = &dn->dn_dirty_records[i]; - dbuf_dirty_record_t *dr; - - for (dr = list_head(list); dr != NULL; - dr = list_next(list, dr)) { - dmu_buf_impl_t *db = dr->dr_dbuf; - - if (db->db_blkid == DMU_SPILL_BLKID || - db->db_blkid == DMU_BONUS_BLKID) - continue; - - clean = B_FALSE; - break; - } - } - - if (clean == B_FALSE) + clean = B_FALSE; break; + } } - mutex_exit(&dn->dn_mtx); /* * If compatibility option is on, sync any current changes before From 11ad06d1d8fa6e71fe33b5f97f4c763ea24bdd24 Mon Sep 17 00:00:00 2001 From: TulsiJain Date: Tue, 28 May 2019 14:14:23 -0700 Subject: [PATCH 026/325] Make zfs_async_block_max_blocks handle zero correctly Reviewed-by: Matt Ahrens Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: TulsiJain Closes #8829 Closes #8289 --- module/zfs/dsl_scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index b15c39ac9ca..04a439fad5c 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -3025,8 +3025,10 @@ dsl_scan_async_block_should_pause(dsl_scan_t *scn) if (zfs_recover) return (B_FALSE); - if (scn->scn_visited_this_txg >= zfs_async_block_max_blocks) + if (zfs_async_block_max_blocks != 0 && + scn->scn_visited_this_txg >= zfs_async_block_max_blocks) { return (B_TRUE); + } elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || From 58b2de6420abfc60abcb280f82521a01ff8422af Mon Sep 17 00:00:00 2001 From: DeHackEd Date: Mon, 3 Jun 2019 23:54:43 -0400 Subject: [PATCH 027/325] Wait in 'S' state when send/recv pipe is blocking Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: DHE Closes #8733 Closes #8752 --- module/zfs/bqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/bqueue.c b/module/zfs/bqueue.c index f30253d24bf..3fc7fcaaada 100644 --- a/module/zfs/bqueue.c +++ b/module/zfs/bqueue.c @@ -73,7 +73,7 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size) mutex_enter(&q->bq_lock); obj2node(q, data)->bqn_size = item_size; while (q->bq_size + item_size > q->bq_maxsize) { - cv_wait(&q->bq_add_cv, &q->bq_lock); + cv_wait_sig(&q->bq_add_cv, &q->bq_lock); } q->bq_size += item_size; list_insert_tail(&q->bq_list, data); @@ -91,7 +91,7 @@ bqueue_dequeue(bqueue_t *q) uint64_t item_size; mutex_enter(&q->bq_lock); while (q->bq_size == 0) { - cv_wait(&q->bq_pop_cv, &q->bq_lock); + cv_wait_sig(&q->bq_pop_cv, &q->bq_lock); } ret = list_remove_head(&q->bq_list); ASSERT3P(ret, !=, NULL); From d6920fb99677b93f3f2d94a9f34bdf9c3268bf21 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Tue, 4 Jun 2019 18:05:46 -0700 Subject: [PATCH 028/325] Make Python detection optional and more portable Previously, --without-python would cause ./configure to fail. Now it is able to proceed, and the Python scripts will not be built. Use portable parameter expansion matching instead of nonstandard substring matching to detect the Python version. This test is duplicated in several places, so define a function for it. Don't assume the full path to binaries, since different platforms do install things in different places. Use AC_CHECK_PROGS instead. When building without Python, also build without pyzfs. Sponsored by: iXsystems, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Richard Laager Reviewed-by: Eli Schwartz Signed-off-by: Ryan Moeller Closes #8809 Closes #8731 --- cmd/Makefile.am | 9 +++++++-- config/always-python.m4 | 43 +++++++++++++++++++++++------------------ config/always-pyzfs.m4 | 11 ++++++++--- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 9dd7b8b4f07..0d990789b0c 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,3 +1,8 @@ SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest -SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat zed -SUBDIRS += arc_summary raidz_test zgenhostid +SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid + +if USING_PYTHON +SUBDIRS += arcstat arc_summary dbufstat +endif + +SUBDIRS += mount_zfs zed zvol_id diff --git a/config/always-python.m4 b/config/always-python.m4 index 858ab7b0158..7cfefd9ebca 100644 --- a/config/always-python.m4 +++ b/config/always-python.m4 @@ -12,6 +12,17 @@ AC_DEFUN([ZFS_AC_PYTHON_VERSION], [ ]) ]) +dnl # +dnl # ZFS_AC_PYTHON_VERSION_IS_2 +dnl # ZFS_AC_PYTHON_VERSION_IS_3 +dnl # +dnl # Tests if the $PYTHON_VERSION matches 2.x or 3.x. +dnl # +AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_2], + [test "${PYTHON_VERSION%%\.*}" = "2"]) +AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_3], + [test "${PYTHON_VERSION%%\.*}" = "3"]) + dnl # dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) dnl # @@ -46,42 +57,36 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ [with_python=check]) AS_CASE([$with_python], - [check], - [AS_IF([test -x /usr/bin/python3], - [PYTHON="python3"], - [AS_IF([test -x /usr/bin/python2], - [PYTHON="python2"], - [PYTHON=""] - )] - )], + [check], [AC_CHECK_PROGS([PYTHON], [python3 python2], [:])], [2*], [PYTHON="python${with_python}"], [*python2*], [PYTHON="${with_python}"], [3*], [PYTHON="python${with_python}"], [*python3*], [PYTHON="${with_python}"], - [no], [PYTHON=""], + [no], [PYTHON=":"], [AC_MSG_ERROR([Unknown --with-python value '$with_python'])] ) - AS_IF([$PYTHON --version >/dev/null 2>&1], [ /bin/true ], [ - AC_MSG_ERROR([Cannot find $PYTHON in your system path]) + AS_IF([test $PYTHON != :], [ + AS_IF([$PYTHON --version >/dev/null 2>&1], + [AM_PATH_PYTHON([2.6], [], [:])], + [AC_MSG_ERROR([Cannot find $PYTHON in your system path])] + ) ]) - - AM_PATH_PYTHON([2.6], [], [:]) AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) - AM_CONDITIONAL([USING_PYTHON_2], [test "${PYTHON_VERSION:0:2}" = "2."]) - AM_CONDITIONAL([USING_PYTHON_3], [test "${PYTHON_VERSION:0:2}" = "3."]) + AM_CONDITIONAL([USING_PYTHON_2], [ZFS_AC_PYTHON_VERSION_IS_2]) + AM_CONDITIONAL([USING_PYTHON_3], [ZFS_AC_PYTHON_VERSION_IS_3]) dnl # dnl # Minimum supported Python versions for utilities: dnl # Python 2.6.x, or Python 3.4.x dnl # - AS_IF([test "${PYTHON_VERSION:0:2}" = "2."], [ - ZFS_AC_PYTHON_VERSION([>= '2.6'], [ /bin/true ], + AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ + ZFS_AC_PYTHON_VERSION([>= '2.6'], [ true ], [AC_MSG_ERROR("Python >= 2.6.x is not available")]) ]) - AS_IF([test "${PYTHON_VERSION:0:2}" = "3."], [ - ZFS_AC_PYTHON_VERSION([>= '3.4'], [ /bin/true ], + AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ + ZFS_AC_PYTHON_VERSION([>= '3.4'], [ true ], [AC_MSG_ERROR("Python >= 3.4.x is not available")]) ]) diff --git a/config/always-pyzfs.m4 b/config/always-pyzfs.m4 index d74d6f1a756..6f32e98feed 100644 --- a/config/always-pyzfs.m4 +++ b/config/always-pyzfs.m4 @@ -18,7 +18,12 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ DEFINE_PYZFS='--without pyzfs' ]) ], [ - DEFINE_PYZFS='' + AS_IF([test $PYTHON != :], [ + DEFINE_PYZFS='' + ], [ + enable_pyzfs=no + DEFINE_PYZFS='--without pyzfs' + ]) ]) AC_SUBST(DEFINE_PYZFS) @@ -26,10 +31,10 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # Require python-devel libraries dnl # AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ - AS_IF([test "${PYTHON_VERSION:0:2}" = "2."], [ + AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ PYTHON_REQUIRED_VERSION=">= '2.7.0'" ], [ - AS_IF([test "${PYTHON_VERSION:0:2}" = "3."], [ + AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ PYTHON_REQUIRED_VERSION=">= '3.4.0'" ], [ AC_MSG_ERROR("Python $PYTHON_VERSION unknown") From a0bf24952d1af9d8ac8bdd16f9cf7037c7d9bbf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 4 Jun 2019 21:12:16 -0400 Subject: [PATCH 029/325] Allow TRIM_UNUSED_KSYM when build as a builtin-module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ZFS is built with enable_linux_builtin, it seems to be possible to compile the kernel with TRIM_UNUSED_KSYM. Reviewed-by: Brian Behlendorf Signed-off-by: Torsten Wörtwein Closes #8820 --- config/kernel.m4 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/config/kernel.m4 b/config/kernel.m4 index 78d3a90a7f6..9a36302c048 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -529,10 +529,11 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS], [ AC_MSG_RESULT([yes]) ],[ AC_MSG_RESULT([no]) - AC_MSG_ERROR([ + AS_IF([test "x$enable_linux_builtin" != xyes], [ + AC_MSG_ERROR([ *** This kernel has unused symbols trimming enabled, please disable. *** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.]) - ]) + ])]) ]) dnl # From 02010e9c2c1b3a8944f7c589284a9a2de5086797 Mon Sep 17 00:00:00 2001 From: Peter Wirdemo <4224155+pewo@users.noreply.github.com> Date: Wed, 5 Jun 2019 18:09:17 +0200 Subject: [PATCH 030/325] Fixed a small typo in man/man1/raidz_test.1 Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Chris Dunlop Signed-off-by: Peter Wirdemo Closes #8855 --- man/man1/raidz_test.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man1/raidz_test.1 b/man/man1/raidz_test.1 index 90d858d5bb4..423177a1b83 100644 --- a/man/man1/raidz_test.1 +++ b/man/man1/raidz_test.1 @@ -25,7 +25,7 @@ .TH raidz_test 1 "2016" "ZFS on Linux" "User Commands" .SH NAME -\fBraidz_test\fR \- raidz implementation verification and bencmarking tool +\fBraidz_test\fR \- raidz implementation verification and benchmarking tool .SH SYNOPSIS .LP .BI "raidz_test " From 5108d27aec5280b9f4d2a16c2feeca76d05a5ca6 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Wed, 5 Jun 2019 15:21:25 -0600 Subject: [PATCH 031/325] hkdf_test binary should only have one icp instance The build for test binary hkdf_test was linking both against libicp and libzpool. This results in two instances of libicp inside the binary but the call to icp_init() only initializes one of them! Reviewed-by: Richard Elling Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Don Brady Closes #8850 --- tests/zfs-tests/tests/functional/hkdf/Makefile.am | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/hkdf/Makefile.am b/tests/zfs-tests/tests/functional/hkdf/Makefile.am index d0a68f442fa..3ac26ed21c1 100644 --- a/tests/zfs-tests/tests/functional/hkdf/Makefile.am +++ b/tests/zfs-tests/tests/functional/hkdf/Makefile.am @@ -2,8 +2,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include AM_CPPFLAGS += -I$(top_srcdir)/lib/libspl/include -LDADD = $(top_srcdir)/lib/libicp/libicp.la -LDADD += $(top_srcdir)/lib/libzpool/libzpool.la +LDADD = $(top_srcdir)/lib/libzpool/libzpool.la AUTOMAKE_OPTIONS = subdir-objects From 35050ef39ec1cfeba08f18d4c209fc9a4d3ec041 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 6 Jun 2019 15:59:39 -0400 Subject: [PATCH 032/325] Fix integer overflow of ZTOI(zp)->i_generation The ZFS on-disk format stores each inode's generation ID as a 64 bit number on disk and in-core. However, the Linux kernel's inode is only a 32 bit number. In most places, the code handles this correctly, but the cast is missing in zfs_rezget(). For many pools, this isn't an issue since the generation ID is computed as the current txg when the inode is created and many pools don't have more than 2^32 txgs. For the pools that have more txgs, this issue causes any inode with a high enough generation number to report IO errors after a call to "zfs rollback" while holding the file or directory open. This patch simply adds the missing cast. Reviewed-by: Alek Pinchuk Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8858 --- module/zfs/zfs_znode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 77eb8bb9126..d5ed4af7029 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1255,7 +1255,7 @@ zfs_rezget(znode_t *zp) ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime); - if (gen != ZTOI(zp)->i_generation) { + if ((uint32_t)gen != ZTOI(zp)->i_generation) { zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zfsvfs, zh); return (SET_ERROR(EIO)); From 9fd95a2f1bdda6d7d8d947c9658d432d9f7821e3 Mon Sep 17 00:00:00 2001 From: Garrett Fields Date: Thu, 6 Jun 2019 16:04:35 -0400 Subject: [PATCH 033/325] If $ZFS_BOOTFS contains guid, replace the guid portion with $pool Reviewed-by: George Melikov Reviewed-by: Richard Laager Signed-off-by: Garrett Fields Closes #8356 --- contrib/initramfs/scripts/zfs.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 36b7f436c1f..ad604a82ce5 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -878,7 +878,9 @@ mountroot() pool="$("${ZPOOL}" get name,guid -o name,value -H | \ awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')" if [ -n "$pool" ]; then - ZFS_BOOTFS="${pool}/${ZFS_BOOTFS#*/}" + # If $ZFS_BOOTFS contains guid, replace the guid portion with $pool + ZFS_BOOTFS=$(echo "$ZFS_BOOTFS" | \ + sed -e "s/$("${ZPOOL}" get guid -o value "$pool" -H)/$pool/g") ZFS_RPOOL="${pool}" fi From 8dc8bbde6ed082cb22a3d3a8f662d112e3dafecf Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 6 Jun 2019 16:47:34 -0400 Subject: [PATCH 034/325] Reinstate raw receive check when truncating This patch re-adds a check that was removed in 369aa50. The check confirms that a raw receive is not occuring before truncating an object's dn_maxblkid. At the time, it was believed that all cases that would hit this code path would be handled in other places, but that was not the case. Reviewed-by: Matt Ahrens Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8852 Closes #8857 --- module/zfs/dnode_sync.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 581f812a14d..d3acf1baaea 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -384,7 +384,21 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - if (trunc) { + /* + * Do not truncate the maxblkid if we are performing a raw + * receive. The raw receive sets the maxblkid manually and + * must not be overridden. Usually, the last DRR_FREE record + * will be at the maxblkid, because the source system sets + * the maxblkid when truncating. However, if the last block + * was freed by overwriting with zeros and being compressed + * away to a hole, the source system will generate a DRR_FREE + * record while leaving the maxblkid after the end of that + * record. In this case we need to leave the maxblkid as + * indicated in the DRR_OBJECT record, so that it matches the + * source system, ensuring that the cryptographic hashes will + * match. + */ + if (trunc && !dn->dn_objset->os_raw_receive) { ASSERTV(uint64_t off); dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; From eaa21b2349fadcbbc9ebf48775311898ab27d2fb Mon Sep 17 00:00:00 2001 From: Samuel VERSCHELDE Date: Mon, 10 Jun 2019 18:06:58 +0200 Subject: [PATCH 035/325] Fix %post and %postun generation in kmodtool During zfs-kmod RPM build, $(uname -r) gets unintentionally evaluated on the build host, once and for all. It should be evaluated during the execution of the scriptlets on the installation host. Escaping the $ character avoids evaluating it during build. Reviewed-by: Brian Behlendorf Reviewed-by: Olaf Faaland Reviewed-by: Neal Gompa Signed-off-by: Samuel Verschelde Closes #8866 --- scripts/kmodtool | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kmodtool b/scripts/kmodtool index 27a14cdac23..a632dd046b5 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -178,9 +178,9 @@ EOF else cat < /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -[[ "$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : EOF fi From ba505f90d8a1c1c8608fd7ba24b108e8f15b65e0 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Mon, 10 Jun 2019 12:08:53 -0400 Subject: [PATCH 036/325] arc_summary: prefer python3 version and install when there is no python This matches the behavior of other python scripts, such as arcstat and dbufstat, which are always installed but whose install-exec-hook actions will simply touch up the shebang if a python interpreter was configured *and* that interpreter is a python2 interpreter. Fixes installation in a minimal build chroot without python available. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Eli Schwartz Closes #8851 --- cmd/arc_summary/Makefile.am | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cmd/arc_summary/Makefile.am b/cmd/arc_summary/Makefile.am index a83edffadcb..7d83624d66d 100644 --- a/cmd/arc_summary/Makefile.am +++ b/cmd/arc_summary/Makefile.am @@ -4,9 +4,7 @@ if USING_PYTHON_2 dist_bin_SCRIPTS = arc_summary2 install-exec-hook: mv $(DESTDIR)$(bindir)/arc_summary2 $(DESTDIR)$(bindir)/arc_summary -endif - -if USING_PYTHON_3 +else dist_bin_SCRIPTS = arc_summary3 install-exec-hook: mv $(DESTDIR)$(bindir)/arc_summary3 $(DESTDIR)$(bindir)/arc_summary From 581c77e725b3bff3f9539dd06b7ea83d92161abc Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 10 Jun 2019 12:45:08 -0400 Subject: [PATCH 037/325] Fix incorrect error message for raw receive This patch fixes an incorrect error message that comes up when doing a non-forcing, raw, incremental receive into a dataset that has a newer snapshot than the "from" snapshot. In this case, the current code prints a confusing message about an IVset guid mismatch. This functionality is supported by non-raw receives as an undocumented feature, but was never supported by the raw receive code. If this is desired in the future, we can probably figure out a way to make it work. Reviewed by: Brian Behlendorf Reviewed by: Matthew Ahrens Signed-off-by: Tom Caputi Issue #8758 Closes #8863 --- module/zfs/dmu_recv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 976b1bd4642..65a031b42cc 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -158,9 +158,16 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, } else { /* * If we are not forcing, there must be no - * changes since fromsnap. + * changes since fromsnap. Raw sends have an + * additional constraint that requires that + * no "noop" snapshots exist between fromsnap + * and tosnap for the IVset checking code to + * work properly. */ - if (dsl_dataset_modified_since_snap(ds, snap)) { + if (dsl_dataset_modified_since_snap(ds, snap) || + (raw && + dsl_dataset_phys(ds)->ds_prev_snap_obj != + snap->ds_object)) { dsl_dataset_rele(snap, FTAG); return (SET_ERROR(ETXTBSY)); } From 72888812b01edb90f2f5bf6a4c7bcd9aafda951c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 10 Jun 2019 12:52:25 -0400 Subject: [PATCH 038/325] Fix comparison signedness in arc_is_overflowing() When ARC size is very small, aggsum_lower_bound(&arc_size) may return negative values, that due to unsigned comparison caused delays, waiting for arc_adjust() to "fix" it by calling aggsum_value(&arc_size). Use of signed comparison there fixes the problem. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Alexander Motin Closes #8873 --- module/zfs/arc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 9b500352a4c..3dfa6ca202d 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5480,7 +5480,7 @@ static boolean_t arc_is_overflowing(void) { /* Always allow at least one block of overflow */ - uint64_t overflow = MAX(SPA_MAXBLOCKSIZE, + int64_t overflow = MAX(SPA_MAXBLOCKSIZE, arc_c >> zfs_arc_overflow_shift); /* @@ -5492,7 +5492,7 @@ arc_is_overflowing(void) * in the ARC. In practice, that's in the tens of MB, which is low * enough to be safe. */ - return (aggsum_lower_bound(&arc_size) >= arc_c + overflow); + return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow); } static abd_t * From 63b88f7e223c1061c522762803b1431e7faba5b3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 14 Jun 2019 09:43:18 -0700 Subject: [PATCH 039/325] Tag zfs-0.8.1 META file and changelog updated. Signed-off-by: Tony Hutter --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index a93750eebd9..d9285b7732e 100644 --- a/META +++ b/META @@ -1,7 +1,7 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.0 +Version: 0.8.1 Release: 1 Release-Tags: relext License: CDDL From e0cd6c28a38bb514351eb696e613e0e36755f867 Mon Sep 17 00:00:00 2001 From: Rafael Kitover Date: Thu, 23 May 2019 14:40:28 -0700 Subject: [PATCH 040/325] kernel timer API rework In `config/kernel-timer.m4` refactor slightly to check more generally for the new `timer_setup()` APIs, but also check the callback signature because some kernels (notably 4.14) have the new `timer_setup()` API but use the old callback signature. Also add a check for a `flags` member in `struct timer_list`, which was added in 4.1-rc8. Add compatibility shims to `include/spl/sys/timer.h` to allow using the new timer APIs with the only two caveats being that the callback argument type must be declared as `spl_timer_list_t` and an explicit assignment is required to get the timer variable for the `timer_of()` macro. So the callback would look like this: ```c __cv_wakeup(spl_timer_list_t t) { struct timer_list *tmr = (struct timer_list *)t; struct thing *parent = from_timer(parent, tmr, parent_timer_field); ... /* do stuff with parent */ ``` Make some minor changes to `spl-condvar.c` and `spl-taskq.c` to use the new timer APIs instead of conditional code. Reviewed-by: Tomohiro Kusumi Reviewed-by: Brian Behlendorf Signed-off-by: Rafael Kitover Closes #8647 --- config/kernel-timer.m4 | 63 +++++++++++++++++++++++++++++++++------- config/kernel.m4 | 2 +- include/spl/sys/timer.h | 25 ++++++++++++++++ module/spl/spl-condvar.c | 29 +++++++++++++----- module/spl/spl-taskq.c | 24 +++------------ 5 files changed, 103 insertions(+), 40 deletions(-) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index 4dc3f84ed47..b0e1afa153a 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -1,26 +1,51 @@ +dnl # 4.14-rc3 API change +dnl # https://lwn.net/Articles/735887/ dnl # -dnl # 4.15 API change -dnl # https://lkml.org/lkml/2017/11/25/90 dnl # Check if timer_list.func get passed a timer_list or an unsigned long dnl # (older kernels). Also sanity check the from_timer() and timer_setup() dnl # macros are available as well, since they will be used in the same newer dnl # kernels that support the new timer_list.func signature. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST], [ - AC_MSG_CHECKING([whether timer_list.function gets a timer_list]) +dnl # Also check for the existance of flags in struct timer_list, they were +dnl # added in 4.1-rc8 via 0eeda71bc30d. + +AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ + AC_MSG_CHECKING([whether timer_setup() is available]) tmp_flags="$EXTRA_KCFLAGS" EXTRA_KCFLAGS="-Werror" + ZFS_LINUX_TRY_COMPILE([ #include - void task_expire(struct timer_list *tl) {} + + struct my_task_timer { + struct timer_list timer; + int data; + }; + + void task_expire(struct timer_list *tl) + { + struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); + task_timer->data = 42; + } + ],[ + struct my_task_timer task_timer; + timer_setup(&task_timer.timer, task_expire, 0); ],[ - #ifndef from_timer - #error "No from_timer() macro" - #endif + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, + [timer_setup() is available]) + ],[ + AC_MSG_RESULT(no) + ]) - struct timer_list timer; - timer.function = task_expire; - timer_setup(&timer, NULL, 0); + AC_MSG_CHECKING([whether timer function expects timer_list]) + + ZFS_LINUX_TRY_COMPILE([ + #include + void task_expire(struct timer_list *tl) {} + ],[ + struct timer_list tl; + tl.function = task_expire; ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, @@ -28,5 +53,21 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST], [ ],[ AC_MSG_RESULT(no) ]) + + AC_MSG_CHECKING([whether struct timer_list has flags]) + + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct timer_list tl; + tl.flags = 2; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, + [struct timer_list has a flags member]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9a36302c048..fbc04bdf7d7 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -36,7 +36,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_GROUP_INFO_GID ZFS_AC_KERNEL_WRITE ZFS_AC_KERNEL_READ - ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST + ZFS_AC_KERNEL_TIMER_SETUP ZFS_AC_KERNEL_DECLARE_EVENT_CLASS ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS diff --git a/include/spl/sys/timer.h b/include/spl/sys/timer.h index a6b134570cd..31d89d3b97d 100644 --- a/include/spl/sys/timer.h +++ b/include/spl/sys/timer.h @@ -72,4 +72,29 @@ usleep_range(unsigned long min, unsigned long max) #define USEC_TO_TICK(us) usecs_to_jiffies(us) #define NSEC_TO_TICK(ns) usecs_to_jiffies(ns / NSEC_PER_USEC) +#ifndef from_timer +#define from_timer(var, timer, timer_field) \ + container_of(timer, typeof(*var), timer_field) +#endif + +#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST +typedef struct timer_list *spl_timer_list_t; +#else +typedef unsigned long spl_timer_list_t; +#endif + +#ifndef HAVE_KERNEL_TIMER_SETUP + +static inline void +timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl) +{ +#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS + (timer)->flags = fl; +#endif + init_timer(timer); + setup_timer(timer, func, (spl_timer_list_t)(timer)); +} + +#endif /* HAVE_KERNEL_TIMER_SETUP */ + #endif /* _SPL_TIMER_H */ diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 1e6e38b7874..a7a9d1db9a9 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -154,26 +154,39 @@ EXPORT_SYMBOL(__cv_wait_sig); #if defined(HAVE_IO_SCHEDULE_TIMEOUT) #define spl_io_schedule_timeout(t) io_schedule_timeout(t) #else + +struct spl_task_timer { + struct timer_list timer; + struct task_struct *task; +}; + static void -__cv_wakeup(unsigned long data) +__cv_wakeup(spl_timer_list_t t) { - wake_up_process((struct task_struct *)data); + struct timer_list *tmr = (struct timer_list *)t; + struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); + + wake_up_process(task_timer->task); } static long spl_io_schedule_timeout(long time_left) { long expire_time = jiffies + time_left; - struct timer_list timer; + struct spl_task_timer task_timer; + struct timer_list *timer = &task_timer.timer; + + task_timer.task = current; - init_timer(&timer); - setup_timer(&timer, __cv_wakeup, (unsigned long)current); - timer.expires = expire_time; - add_timer(&timer); + timer_setup(timer, __cv_wakeup, 0); + + timer->expires = expire_time; + add_timer(timer); io_schedule(); - del_timer_sync(&timer); + del_timer_sync(timer); + time_left = expire_time - jiffies; return (time_left < 0 ? 0 : time_left); diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c index 7684257be7a..a39f94e4cc2 100644 --- a/module/spl/spl-taskq.c +++ b/module/spl/spl-taskq.c @@ -24,6 +24,7 @@ * Solaris Porting Layer (SPL) Task Queue Implementation. */ +#include #include #include #include @@ -242,20 +243,13 @@ task_expire_impl(taskq_ent_t *t) wake_up(&tq->tq_work_waitq); } -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST static void -task_expire(struct timer_list *tl) +task_expire(spl_timer_list_t tl) { - taskq_ent_t *t = from_timer(t, tl, tqent_timer); + struct timer_list *tmr = (struct timer_list *)tl; + taskq_ent_t *t = from_timer(t, tmr, tqent_timer); task_expire_impl(t); } -#else -static void -task_expire(unsigned long data) -{ - task_expire_impl((taskq_ent_t *)data); -} -#endif /* * Returns the lowest incomplete taskqid_t. The taskqid_t may @@ -597,9 +591,6 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; -#ifndef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST - t->tqent_timer.data = 0; -#endif t->tqent_timer.function = NULL; t->tqent_timer.expires = 0; t->tqent_birth = jiffies; @@ -649,9 +640,6 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; -#ifndef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST - t->tqent_timer.data = (unsigned long)t; -#endif t->tqent_timer.function = task_expire; t->tqent_timer.expires = (unsigned long)expire_time; add_timer(&t->tqent_timer); @@ -744,11 +732,7 @@ taskq_init_ent(taskq_ent_t *t) { spin_lock_init(&t->tqent_lock); init_waitqueue_head(&t->tqent_waitq); -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST timer_setup(&t->tqent_timer, NULL, 0); -#else - init_timer(&t->tqent_timer); -#endif INIT_LIST_HEAD(&t->tqent_list); t->tqent_id = 0; t->tqent_func = NULL; From 4933b0a25b24fbfe79d1495871cd9ed3eeae97ea Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sat, 25 May 2019 08:43:23 +0900 Subject: [PATCH 041/325] Drop local definition of MOUNT_BUSY It's accessible via . Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: Tomohiro Kusumi Closes #8765 --- module/zfs/zfs_ctldir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 46e6e19b91d..c8071a7c215 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -85,6 +85,7 @@ #include #include #include +#include #include "zfs_namecheck.h" /* @@ -1047,8 +1048,6 @@ zfsctl_snapshot_unmount(char *snapname, int flags) return (error); } -#define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ - int zfsctl_snapshot_mount(struct path *path, int flags) { From e5a877c5d09cd6002cd5375f298570ac38a5b19d Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sun, 26 May 2019 06:29:10 +0900 Subject: [PATCH 042/325] Update descriptions for vnops These descriptions are not uptodate with the code. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #8767 --- module/zfs/zfs_vnops.c | 20 +++++++++++--------- module/zfs/zfs_znode.c | 7 +++---- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 885d9633b01..9d8a9cbc541 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1676,6 +1676,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, * IN: dip - inode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -1917,6 +1918,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. + * flags - case flags. * vsecp - ACL to be set * * OUT: ipp - inode of created directory. @@ -2235,13 +2237,12 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, } /* - * Read as many directory entries as will fit into the provided - * dirent buffer from the given directory cursor position. + * Read directory entries from the given directory cursor position and emit + * name and position for each entry. * * IN: ip - inode of directory to read. - * dirent - buffer for directory entries. - * - * OUT: dirent - filler buffer of directory entries. + * ctx - directory entry context. + * cr - credentials of caller. * * RETURN: 0 if success * error code if failure @@ -4006,13 +4007,14 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, * Insert the indicated symbolic reference entry into the directory. * * IN: dip - Directory to contain new symbolic link. - * link - Name for new symlink entry. + * name - Name of directory entry in dip. * vap - Attributes of new entry. - * target - Target path of new symlink. - * + * link - Name for new symlink entry. * cr - credentials of caller. * flags - case flags * + * OUT: ipp - Inode for new symbolic link. + * * RETURN: 0 on success, error code on failure. * * Timestamps: @@ -4216,6 +4218,7 @@ zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) * sip - inode of new entry. * name - name of new entry. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -4729,7 +4732,6 @@ zfs_inactive(struct inode *ip) * IN: ip - inode seeking within * ooff - old file offset * noffp - pointer to new file offset - * ct - caller context * * RETURN: 0 if success * EINVAL if new offset invalid diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index d5ed4af7029..a27129b7992 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -651,12 +651,11 @@ static zfs_acl_phys_t acl_phys; * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root + * IS_TMPFILE - new object is of O_TMPFILE * IS_XATTR - new object is an attribute - * bonuslen - length of bonus buffer - * setaclp - File/Dir initial ACL - * fuidp - Tracks fuid allocation. + * acl_ids - ACL related attributes * - * OUT: zpp - allocated znode + * OUT: zpp - allocated znode (set to dzp if IS_ROOT_NODE) * */ void From 90d8067a77977184cbd99d18582984b9a767fb7f Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Tue, 28 May 2019 15:18:31 -0700 Subject: [PATCH 043/325] Update comments to match code s/get_vdev_spec/make_root_vdev The former doesn't exist anymore. Sponsored by: iXsystems, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: Ryan Moeller Closes #8759 --- cmd/zpool/zpool_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 2cb6774b9ad..a3c76030d63 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -785,7 +785,7 @@ add_prop_list_default(const char *propname, char *propval, nvlist_t **props, * -P Display full path for vdev name. * * Adds the given vdevs to 'pool'. As with create, the bulk of this work is - * handled by get_vdev_spec(), which constructs the nvlist needed to pass to + * handled by make_root_vdev(), which constructs the nvlist needed to pass to * libzfs. */ int @@ -883,7 +883,7 @@ zpool_do_add(int argc, char **argv) } } - /* pass off to get_vdev_spec for processing */ + /* pass off to make_root_vdev for processing */ nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun, argc, argv); if (nvroot == NULL) { @@ -1232,9 +1232,9 @@ zpool_do_labelclear(int argc, char **argv) * -O Set fsproperty=value in the pool's root file system * * Creates the named pool according to the given vdev specification. The - * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once - * we get the nvlist back from get_vdev_spec(), we either print out the contents - * (if '-n' was specified), or pass it to libzfs to do the creation. + * bulk of the vdev processing is done in make_root_vdev() in zpool_vdev.c. + * Once we get the nvlist back from make_root_vdev(), we either print out the + * contents (if '-n' was specified), or pass it to libzfs to do the creation. */ int zpool_do_create(int argc, char **argv) @@ -1388,7 +1388,7 @@ zpool_do_create(int argc, char **argv) goto errout; } - /* pass off to get_vdev_spec for bulk processing */ + /* pass off to make_root_vdev for bulk processing */ nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, argc - 1, argv + 1); if (nvroot == NULL) From e4a11acfac078b21f1b84c95d8ddb7a99306eb34 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Wed, 29 May 2019 07:31:39 +0900 Subject: [PATCH 044/325] Refactor parent dataset handling in libzfs zfs_rename() For recursive renaming, simplify the code by moving `zhrp` and `parentname` to inner scope. `zhrp` is only used to test existence of a parent dataset for recursive dataset dir scan since ba6a24026c. Reviewed by: Brian Behlendorf Reviewed-by: Richard Laager Reviewed-by: Giuseppe Di Natale Signed-off-by: Tomohiro Kusumi Closes #8815 --- lib/libzfs/libzfs_dataset.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index e26b32786db..93af50b99cd 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -4470,8 +4470,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, zfs_cmd_t zc = {"\0"}; char *delim; prop_changelist_t *cl = NULL; - zfs_handle_t *zhrp = NULL; - char *parentname = NULL; char parent[ZFS_MAX_DATASET_NAME_LEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4566,7 +4564,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } if (recursive) { - parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); + zfs_handle_t *zhrp; + char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { ret = -1; goto error; @@ -4574,10 +4573,12 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, delim = strchr(parentname, '@'); *delim = '\0'; zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); + free(parentname); if (zhrp == NULL) { ret = -1; goto error; } + zfs_close(zhrp); } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, CL_GATHER_ITER_MOUNTED, @@ -4650,12 +4651,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } error: - if (parentname != NULL) { - free(parentname); - } - if (zhrp != NULL) { - zfs_close(zhrp); - } if (cl != NULL) { changelist_free(cl); } From 6ce10fdabb0c071b1cf5d7c21564c076d9882ec9 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Tue, 28 May 2019 18:58:32 -0400 Subject: [PATCH 045/325] grammar: it is / plural agreement Reviewed-by: Richard Laager Reviewed-by: Matt Ahrens Reviewed-by: Chris Dunlop Signed-off-by: Josh Soref Closes #8818 --- cmd/zfs/zfs_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index d75f089acd1..214a437c5dd 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6733,8 +6733,8 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused) /* * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an - * absolute path, find the entry /proc/self/mounts, verify that its a - * ZFS filesystems, and unmount it appropriately. + * absolute path, find the entry /proc/self/mounts, verify that it's a + * ZFS filesystem, and unmount it appropriately. */ static int unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) From 328c95e391ed775ab781392ab57cb64200caa928 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 30 May 2019 08:18:14 +0900 Subject: [PATCH 046/325] Remove vn_set_fs_pwd()/vn_set_pwd() (no need to be at / during insmod) Per suggestion from @behlendorf in #8777, remove vn_set_fs_pwd() and vn_set_pwd() which are only used in zfs_ioctl.c:_init() while loading zfs.ko. The rest of initialization functions being called here after cwd set to / don't depend on cwd of the process except for spa_config_load(). spa_config_load() uses a relative path ".//etc/zfs/zpool.cache" when `rootdir` is non-NULL, which is "/etc/zfs/zpool.cache" given cwd is /, so just unconditionally use the absolute path without "./", so that `vn_set_pwd("/")` as well as the entire functions can be removed. This is also what FreeBSD does. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #8826 --- config/kernel-spinlock.m4 | 24 ---------------- config/kernel.m4 | 1 - include/spl/sys/vnode.h | 1 - module/spl/spl-vnode.c | 58 --------------------------------------- module/zfs/spa_config.c | 3 +- module/zfs/zfs_ioctl.c | 7 ----- 6 files changed, 1 insertion(+), 93 deletions(-) delete mode 100644 config/kernel-spinlock.m4 diff --git a/config/kernel-spinlock.m4 b/config/kernel-spinlock.m4 deleted file mode 100644 index d6d6640070b..00000000000 --- a/config/kernel-spinlock.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.36 API change, -dnl # The 'struct fs_struct->lock' was changed from a rwlock_t to -dnl # a spinlock_t to improve the fastpath performance. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK], [ - AC_MSG_CHECKING([whether struct fs_struct uses spinlock_t]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - static struct fs_struct fs; - spin_lock_init(&fs.lock); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FS_STRUCT_SPINLOCK, 1, - [struct fs_struct uses spinlock_t]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index fbc04bdf7d7..8e89c8014d8 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -12,7 +12,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CTL_NAME ZFS_AC_KERNEL_PDE_DATA ZFS_AC_KERNEL_2ARGS_VFS_FSYNC - ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE diff --git a/include/spl/sys/vnode.h b/include/spl/sys/vnode.h index 71278b08c86..7bd278e4e13 100644 --- a/include/spl/sys/vnode.h +++ b/include/spl/sys/vnode.h @@ -182,7 +182,6 @@ extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, extern file_t *vn_getf(int fd); extern void vn_releasef(int fd); extern void vn_areleasef(int fd, uf_info_t *fip); -extern int vn_set_pwd(const char *filename); int spl_vn_init(void); void spl_vn_fini(void); diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index 11b5e4e5a2f..d9056c964e5 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -641,64 +641,6 @@ vn_areleasef(int fd, uf_info_t *fip) } /* releasef() */ EXPORT_SYMBOL(areleasef); - -static void -vn_set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - -#ifdef HAVE_FS_STRUCT_SPINLOCK - spin_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - spin_unlock(&fs->lock); -#else - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); -#endif /* HAVE_FS_STRUCT_SPINLOCK */ - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -int -vn_set_pwd(const char *filename) -{ - struct path path; - mm_segment_t saved_fs; - int rc; - - /* - * user_path_dir() and __user_walk() both expect 'filename' to be - * a user space address so we must briefly increase the data segment - * size to ensure strncpy_from_user() does not fail with -EFAULT. - */ - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = user_path_dir(filename, &path); - if (rc) - goto out; - - rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); - if (rc) - goto dput_and_out; - - vn_set_fs_pwd(current->fs, &path); - -dput_and_out: - path_put(&path); -out: - set_fs(saved_fs); - - return (-rc); -} /* vn_set_pwd() */ -EXPORT_SYMBOL(vn_set_pwd); - static int vn_cache_constructor(void *buf, void *cdrarg, int kmflags) { diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 8616abda37b..6c0894338e2 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -93,8 +93,7 @@ spa_config_load(void) */ pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(pathname, MAXPATHLEN, "%s%s", - (rootdir != NULL) ? "./" : "", spa_config_path); + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); file = kobj_open_file(pathname); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index f30d0a89441..c6b55d24f7e 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -7380,13 +7380,6 @@ _init(void) { int error; - error = -vn_set_pwd("/"); - if (error) { - printk(KERN_NOTICE - "ZFS: Warning unable to set pwd to '/': %d\n", error); - return (error); - } - if ((error = -zvol_init()) != 0) return (error); From fafe72712afbbedd9bcf6cd4b3d7b2b2f168b054 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 6 Jun 2019 06:18:46 +0900 Subject: [PATCH 047/325] Drop objid argument in zfs_znode_alloc() (sync with OpenZFS) Since zfs_znode_alloc() already takes dmu_buf_t*, taking another uint64_t argument for objid is redundant. inode's ->i_ino does and needs to match znode's ->z_id. zfs_znode_alloc() in FreeBSD and illumos doesn't have this argument since vnode doesn't have vnode# in VFS (hence ->z_id exists). Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #8841 --- module/zfs/zfs_znode.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index a27129b7992..3dd29994220 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -515,7 +515,7 @@ zfs_inode_update(znode_t *zp) */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, - dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) + dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; struct inode *ip; @@ -596,7 +596,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_ctime, ctime); - ip->i_ino = obj; + ip->i_ino = zp->z_id; zfs_inode_update(zp); zfs_inode_set_ops(zfsvfs, ip); @@ -910,8 +910,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * not fail retry until sufficient memory has been reclaimed. */ do { - *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, obj, - sa_hdl); + *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); } while (*zpp == NULL); VERIFY(*zpp != NULL); @@ -1134,7 +1133,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * bonus buffer. */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, - doi.doi_bonus_type, obj_num, NULL); + doi.doi_bonus_type, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { From b63ed49c2996d3fe400ddd5e032a521cf05a7d10 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 6 Jun 2019 13:08:41 -0700 Subject: [PATCH 048/325] Reduced IOPS when all vdevs are in the zfs_mg_fragmentation_threshold Historically while doing performance testing we've noticed that IOPS can be significantly reduced when all vdevs in the pool are hitting the zfs_mg_fragmentation_threshold percentage. Specifically in a hypothetical pool with two vdevs, what can happen is the following: Vdev A would go above that threshold and only vdev B would be used. Then vdev B would pass that threshold but vdev A would go below it (we've been freeing from A to allocate to B). The allocations would go back and forth utilizing one vdev at a time with IOPS taking a hit. Empirically, we've seen that our vdev selection for allocations is good enough that fragmentation increases uniformly across all vdevs the majority of the time. Thus we set the threshold percentage high enough to avoid hitting the speed bump on pools that are being pushed to the edge. We effectively disable its effect in the majority of the cases but we don't remove (at least for now) just in case we hit any weird behavior in the future. Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Serapheim Dimitropoulos Closes #8859 --- man/man5/zfs-module-parameters.5 | 2 +- module/zfs/metaslab.c | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 5bca12e06ea..282563f1372 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1817,7 +1817,7 @@ this value. If a metaslab group exceeds this threshold then it will be skipped unless all metaslab groups within the metaslab class have also crossed this threshold. .sp -Default value: \fB85\fR. +Default value: \fB95\fR. .RE .sp diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index ec89810b48a..d1d5a243f40 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -103,12 +103,27 @@ int zfs_mg_noalloc_threshold = 0; /* * Metaslab groups are considered eligible for allocations if their - * fragmenation metric (measured as a percentage) is less than or equal to - * zfs_mg_fragmentation_threshold. If a metaslab group exceeds this threshold - * then it will be skipped unless all metaslab groups within the metaslab - * class have also crossed this threshold. + * fragmenation metric (measured as a percentage) is less than or + * equal to zfs_mg_fragmentation_threshold. If a metaslab group + * exceeds this threshold then it will be skipped unless all metaslab + * groups within the metaslab class have also crossed this threshold. + * + * This tunable was introduced to avoid edge cases where we continue + * allocating from very fragmented disks in our pool while other, less + * fragmented disks, exists. On the other hand, if all disks in the + * pool are uniformly approaching the threshold, the threshold can + * be a speed bump in performance, where we keep switching the disks + * that we allocate from (e.g. we allocate some segments from disk A + * making it bypassing the threshold while freeing segments from disk + * B getting its fragmentation below the threshold). + * + * Empirically, we've seen that our vdev selection for allocations is + * good enough that fragmentation increases uniformly across all vdevs + * the majority of the time. Thus we set the threshold percentage high + * enough to avoid hitting the speed bump on pools that are being pushed + * to the edge. */ -int zfs_mg_fragmentation_threshold = 85; +int zfs_mg_fragmentation_threshold = 95; /* * Allow metaslabs to keep their active state as long as their fragmentation From 60cbc18136d8a5c389ec3e6f3da703f30b9687be Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Thu, 6 Jun 2019 16:14:48 -0400 Subject: [PATCH 049/325] l2arc_apply_transforms: Fix typo in comment Reviewed-by: Chris Dunlop Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Richard Laager Signed-off-by: Allan Jude Closes #8822 --- module/zfs/arc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 3dfa6ca202d..946ea3415ed 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -8760,7 +8760,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, /* * If this data simply needs its own buffer, we simply allocate it - * and copy the data. This may be done to elimiate a depedency on a + * and copy the data. This may be done to eliminate a dependency on a * shared buffer or to reallocate the buffer to match asize. */ if (HDR_HAS_RABD(hdr) && asize != psize) { From 06900c409ba9dd62ace0fec5aa0558ca4f115f18 Mon Sep 17 00:00:00 2001 From: Jorgen Lundman Date: Fri, 7 Jun 2019 11:01:41 +0900 Subject: [PATCH 050/325] Avoid updating zfs_gitrev.h when rev is unchanged Build process would always re-compile spa_history.c due to touching zfs_gitrev.h - avoid if no change in gitrev. Reviewed-by: Brian Behlendorf Reviewed-by: Chris Dunlop Reviewed-by: Allan Jude Signed-off-by: Jorgen Lundman Closes #8860 --- scripts/make_gitrev.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/make_gitrev.sh b/scripts/make_gitrev.sh index bab9be88d73..1cf143794b2 100755 --- a/scripts/make_gitrev.sh +++ b/scripts/make_gitrev.sh @@ -39,3 +39,7 @@ trap cleanup EXIT git rev-parse --git-dir > /dev/null 2>&1 # Get the git current git revision ZFS_GIT_REV=$(git describe --always --long --dirty 2>/dev/null) +# Check if header file already contain the exact string +grep -sq "\"${ZFS_GIT_REV}\"" "$(dirname "$0")"/../include/zfs_gitrev.h && + trap - EXIT +exit 0 From 6f7bc7582539048c2280b7d7892a06e4c7f917f8 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Thu, 6 Jun 2019 19:10:43 -0700 Subject: [PATCH 051/325] Allow metaslab to be unloaded even when not freed from On large systems, the memory used by loaded metaslabs can become a concern. While range trees are a fairly efficient data structure, on heavily fragmented pools they can still consume a significant amount of memory. This problem is amplified when we fail to unload metaslabs that we aren't using. Currently, we only unload a metaslab during metaslab_sync_done; in order for that function to be called on a given metaslab in a given txg, we have to have dirtied that metaslab in that txg. If the dirtying was the result of an allocation, we wouldn't be unloading it (since it wouldn't be 8 txgs since it was selected), so in effect we only unload a metaslab during txgs where it's being freed from. We move the unload logic from sync_done to a new function, and call that function on all metaslabs in a given vdev during vdev_sync_done(). Reviewed-by: Richard Elling Reviewed-by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #8837 --- include/sys/metaslab.h | 1 + module/zfs/metaslab.c | 47 ++++++++++++++++++++++-------------------- module/zfs/vdev.c | 14 +++++++++++++ 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 2790d06c71d..33090252966 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -50,6 +50,7 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, void metaslab_fini(metaslab_t *); int metaslab_load(metaslab_t *); +void metaslab_potentially_unload(metaslab_t *, uint64_t); void metaslab_unload(metaslab_t *); uint64_t metaslab_allocated_space(metaslab_t *); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index d1d5a243f40..41cbaad5f8d 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -2949,6 +2949,30 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) dmu_tx_commit(tx); } +void +metaslab_potentially_unload(metaslab_t *msp, uint64_t txg) +{ + /* + * If the metaslab is loaded and we've not tried to load or allocate + * from it in 'metaslab_unload_delay' txgs, then unload it. + */ + if (msp->ms_loaded && + msp->ms_disabled == 0 && + msp->ms_selected_txg + metaslab_unload_delay < txg) { + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { + VERIFY0(range_tree_space( + msp->ms_allocating[(txg + t) & TXG_MASK])); + } + if (msp->ms_allocator != -1) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } + + if (!metaslab_debug_unload) + metaslab_unload(msp); + } +} + /* * Called after a transaction group has completely synced to mark * all of the metaslab's free space as usable. @@ -3086,27 +3110,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) */ metaslab_recalculate_weight_and_sort(msp); - /* - * If the metaslab is loaded and we've not tried to load or allocate - * from it in 'metaslab_unload_delay' txgs, then unload it. - */ - if (msp->ms_loaded && - msp->ms_disabled == 0 && - msp->ms_selected_txg + metaslab_unload_delay < txg) { - - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { - VERIFY0(range_tree_space( - msp->ms_allocating[(txg + t) & TXG_MASK])); - } - if (msp->ms_allocator != -1) { - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); - } - - if (!metaslab_debug_unload) - metaslab_unload(msp); - } - ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK])); ASSERT0(range_tree_space(msp->ms_freeing)); ASSERT0(range_tree_space(msp->ms_freed)); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 1c4812cd86d..81ef87e254a 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -3234,6 +3234,20 @@ vdev_sync_done(vdev_t *vd, uint64_t txg) != NULL) metaslab_sync_done(msp, txg); + /* + * Because this function is only called on dirty vdevs, it's possible + * we won't consider all metaslabs for unloading on every + * txg. However, unless the system is largely idle it is likely that + * we will dirty all vdevs within a few txgs. + */ + for (int i = 0; i < vd->vdev_ms_count; i++) { + msp = vd->vdev_ms[i]; + mutex_enter(&msp->ms_lock); + if (msp->ms_sm != NULL) + metaslab_potentially_unload(msp, txg); + mutex_exit(&msp->ms_lock); + } + if (reassess) metaslab_sync_reassess(vd->vdev_mg); } From c350e62309edc413f9f2312338e5a0b084ebeb8d Mon Sep 17 00:00:00 2001 From: Richard Elling Date: Wed, 5 Jun 2019 16:13:57 -0700 Subject: [PATCH 052/325] Fix logic error in setpartition function Reviewed by: John Kennedy Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Richard Elling Closes #8839 --- tests/zfs-tests/include/libtest.shlib | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 57d0880cc9b..b3893c2c381 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -861,7 +861,8 @@ function zero_partitions # # best to retire this interface and replace it with something more flexible. # At the moment a best effort is made. # -function set_partition # +# arguments: +function set_partition { typeset -i slicenum=$1 typeset start=$2 @@ -872,6 +873,7 @@ function set_partition # /dev/null + parted $disk -s -- print 1 >/dev/null typeset ret_val=$? if [[ $slicenum -eq 0 || $ret_val -ne 0 ]]; then - parted $DEV_DSKDIR/$disk -s -- mklabel gpt + parted $disk -s -- mklabel gpt if [[ $? -ne 0 ]]; then log_note "Failed to create GPT partition table on $disk" return 1 @@ -899,20 +901,21 @@ function set_partition # /dev/null - block_device_wait + blockdev --rereadpt $disk 2>/dev/null + block_device_wait $disk else if [[ -z $slicenum || -z $size || -z $disk ]]; then log_fail "The slice, size or disk name is unspecified." @@ -932,9 +935,10 @@ function set_partition # > $format_file format -e -s -d $disk -f $format_file + typeset ret_val=$? + rm -f $format_file fi - typeset ret_val=$? rm -f $format_file if [[ $ret_val -ne 0 ]]; then log_note "Unable to format $disk slice $slicenum to $size" From a22b00f92480b7341859266176b23c4a801e462b Mon Sep 17 00:00:00 2001 From: Richard Elling Date: Wed, 5 Jun 2019 16:22:04 -0700 Subject: [PATCH 053/325] Remove redundant redundant remove Reviewed by: John Kennedy Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Richard Elling Closes #8839 --- tests/zfs-tests/include/libtest.shlib | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index b3893c2c381..1b841d7ba02 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -939,7 +939,6 @@ function set_partition rm -f $format_file fi - rm -f $format_file if [[ $ret_val -ne 0 ]]; then log_note "Unable to format $disk slice $slicenum to $size" return 1 From fb52bf9b1daf237e23e49a6ba43eb9d3e300f758 Mon Sep 17 00:00:00 2001 From: Richard Elling Date: Fri, 7 Jun 2019 10:12:42 -0700 Subject: [PATCH 054/325] Block_device_wait does not return an error code Reviewed by: John Kennedy Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Richard Elling Closes #8839 --- tests/zfs-tests/include/blkdev.shlib | 3 +++ .../tests/functional/rsend/send-wDR_encrypted_zvol.ksh | 4 ++-- tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh | 4 ++-- .../zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh | 2 +- .../zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh | 4 ++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 9cac7184f9f..e9d584af4b6 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -56,6 +56,9 @@ function scan_scsi_hosts # # Wait for newly created block devices to have their minors created. # +# Note: there is no meaningful return code if udevadm fails. Consumers +# should not expect a return code (do not call as argument to log_must) +# function block_device_wait { if is_linux; then diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh index 49b846e9c33..443887bfa23 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh @@ -62,7 +62,7 @@ log_must eval "echo 'password' > $keyfile" log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \ -o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL -log_must block_device_wait +block_device_wait log_must eval "echo 'y' | newfs -t ext4 -v $zdev" log_must mkdir -p $mntpnt @@ -82,7 +82,7 @@ done log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile" log_must eval "zfs recv $TESTPOOL/recv < $sendfile" log_must zfs load-key $TESTPOOL/recv -log_must block_device_wait +block_device_wait log_must mount $recvdev $recvmnt diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index 2cdcb38dc25..c8a3cbbf43c 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -86,7 +86,7 @@ log_must zfs create -V 128M $TESTPOOL/$TESTVOL log_must zfs set compression=on $TESTPOOL/$TESTVOL log_must zfs set sync=always $TESTPOOL/$TESTVOL log_must mkdir -p $TESTDIR -log_must block_device_wait +block_device_wait echo "y" | newfs -t ext4 -v $VOLUME log_must mkdir -p $MNTPNT log_must mount -o discard $VOLUME $MNTPNT @@ -149,7 +149,7 @@ log_must zpool export $TESTPOOL # `zpool import -f` because we can't write a frozen pool's labels! # log_must zpool import -f $TESTPOOL -log_must block_device_wait +block_device_wait log_must mount $VOLUME $MNTPNT # diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh index 6607d4ca497..1ee7e33c2ac 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh @@ -88,7 +88,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #verify the snapshot -r results for snap in $snappool $snapfs $snapvol $snapctr $snapctrvol \ diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh index 0f876ad6d61..128b443c6fc 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh @@ -83,7 +83,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #select the $TESTCTR as destroy point, $TESTCTR is a child of $TESTPOOL log_must zfs destroy -r $snapctr @@ -92,7 +92,7 @@ for snap in $snapctr $snapctrvol $snapctrclone $snapctrfs; do log_fail "The snapshot $snap is not destroyed correctly." done -for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1;do +for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1; do ! snapexists $snap && \ log_fail "The snapshot $snap should be not destroyed." done From 4be4dedb9f50edb35b18db4eef5c277bd93d23fa Mon Sep 17 00:00:00 2001 From: Richard Elling Date: Thu, 30 May 2019 16:38:51 -0700 Subject: [PATCH 055/325] Improve ZTS block_device_wait debugging The udevadm settle timeout can be 120 or 180 seconds by default for some distributions. If a long delay is experienced, it could be due to some strangeness in a malfunctioning device that isn't related to the devices under test. To help debug this condition, a notice is given if settle takes too long. Arguments can now be passed to block_device_wait. The expected arguments are block device pathnames. Reviewed by: John Kennedy Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Richard Elling Closes #8839 --- tests/zfs-tests/include/blkdev.shlib | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index e9d584af4b6..ca8807e82c6 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -18,6 +18,7 @@ # Copyright (c) 2017 Lawrence Livermore National Security, LLC. # Copyright (c) 2017 Datto Inc. # Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +# Copyright 2019 Richard Elling # # @@ -55,6 +56,16 @@ function scan_scsi_hosts # # Wait for newly created block devices to have their minors created. +# Additional arguments can be passed to udevadm trigger, with the expected +# arguments to typically be a block device pathname. This is useful when +# checking waiting on a specific device to settle rather than triggering +# all devices and waiting for them all to settle. +# +# The udevadm settle timeout can be 120 or 180 seconds by default for +# some distros. If a long delay is experienced, it could be due to some +# strangeness in a malfunctioning device that isn't related to the devices +# under test. To help debug this condition, a notice is given if settle takes +# too long. # # Note: there is no meaningful return code if udevadm fails. Consumers # should not expect a return code (do not call as argument to log_must) @@ -62,8 +73,12 @@ function scan_scsi_hosts function block_device_wait { if is_linux; then - udevadm trigger + udevadm trigger $* + typeset local start=$SECONDS udevadm settle + typeset local elapsed=$((SECONDS - start)) + [[ $elapsed > 60 ]] && \ + log_note udevadm settle time too long: $elapsed fi } From fe11968bbfb6bd825790a51228483f51b3d30d1f Mon Sep 17 00:00:00 2001 From: bnjf Date: Thu, 13 Jun 2019 06:03:33 +1000 Subject: [PATCH 056/325] Fix typo in vdev_raidz_math.c Fix typo in vdev_raidz_math.c Reviewed by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Brad Forschinger Closes #8875 Closes #8880 --- module/zfs/vdev_raidz_math.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index e6112bc0213..3ef67768f91 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -472,7 +472,7 @@ vdev_raidz_math_init(void) return; #endif - /* Fake an zio and run the benchmark on a warmed up buffer */ + /* Fake a zio and run the benchmark on a warmed up buffer */ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); bench_zio->io_offset = 0; bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ From 812c36fc711b5f1dc7b41f27761b5e283f16df19 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 12 Jun 2019 13:06:55 -0700 Subject: [PATCH 057/325] Target ARC size can get reduced to arc_c_min Sometimes the target ARC size is reduced to arc_c_min, which impacts performance. We've seen this happen as part of the random_reads performance regression test, where the ARC size is reduced before the reads test starts which impacts how long it takes for system to reach good IOPS performance. We call arc_reduce_target_size when arc_reap_cb_check() returns TRUE, and arc_available_memory() is less than arc_c>>arc_shrink_shift. However, arc_available_memory() could easily be low, even when arc_c is low, because we can have tons of unused bufs in the abd kmem cache. This would be especially true just after the DMU requests a bunch of stuff be evicted from the ARC (e.g. due to "zpool export"). To fix this, the ARC should reduce arc_c by the requested amount, not all the way down to arc_size (or arc_c_min), which can be very small. Reviewed-by: Tim Chase Reviewed by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Matthew Ahrens External-issue: DLPX-59431 Closes #8864 --- module/zfs/arc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 946ea3415ed..a7e7d26996f 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -4801,8 +4801,6 @@ arc_reduce_target_size(int64_t to_free) if (c > to_free && c - to_free > arc_c_min) { arc_c = c - to_free; atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); - if (asize < arc_c) - arc_c = MAX(asize, arc_c_min); if (arc_p > arc_c) arc_p = (arc_c >> 1); ASSERT(arc_c >= arc_c_min); From 516a08ebb4e24e09fc9ec39a7204d2f9d20d043d Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 12 Jun 2019 13:13:09 -0700 Subject: [PATCH 058/325] fat zap should prefetch when iterating When iterating over a ZAP object, we're almost always certain to iterate over the entire object. If there are multiple leaf blocks, we can realize a performance win by issuing reads for all the leaf blocks in parallel when the iteration begins. For example, if we have 10,000 snapshots, "zfs destroy -nv pool/fs@1%9999" can take 30 minutes when the cache is cold. This change provides a >3x performance improvement, by issuing the reads for all ~64 blocks of each ZAP object in parallel. Reviewed-by: Andreas Dilger Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens External-issue: DLPX-58347 Closes #8862 --- include/sys/zap.h | 7 ++-- man/man5/zfs-module-parameters.5 | 25 ++++++++++++++ module/zfs/ddt_zap.c | 14 +++++++- module/zfs/dmu.c | 16 +++++++++ module/zfs/zap.c | 56 +++++++++++++++++++++++++++++++- module/zfs/zap_micro.c | 31 +++++++++++++++--- 6 files changed, 140 insertions(+), 9 deletions(-) diff --git a/include/sys/zap.h b/include/sys/zap.h index ab13652d8c0..b19b4643879 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -350,6 +350,7 @@ typedef struct zap_cursor { uint64_t zc_serialized; uint64_t zc_hash; uint32_t zc_cd; + boolean_t zc_prefetch; } zap_cursor_t; typedef struct { @@ -375,7 +376,9 @@ typedef struct { * Initialize a zap cursor, pointing to the "first" attribute of the * zapobj. You must _fini the cursor when you are done with it. */ -void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj); +void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); +void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, + uint64_t zapobj); void zap_cursor_fini(zap_cursor_t *zc); /* diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 282563f1372..29374a9d396 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -104,6 +104,18 @@ to a log2 fraction of the target arc size. Default value: \fB6\fR. .RE +.sp +.ne 2 +.na +\fBdmu_prefetch_max\fR (int) +.ad +.RS 12n +Limit the amount we can prefetch with one call to this amount (in bytes). +This helps to limit the amount of memory that can be used by prefetching. +.sp +Default value: \fB134,217,728\fR (128MB). +.RE + .sp .ne 2 .na @@ -502,6 +514,19 @@ regular reads (but there's no reason it has to be the same). Default value: \fB32,768\fR. .RE +.sp +.ne 2 +.na +\fBzap_iterate_prefetch\fR (int) +.ad +.RS 12n +If this is set, when we start iterating over a ZAP object, zfs will prefetch +the entire object (all leaf blocks). However, this is limited by +\fBdmu_prefetch_max\fR. +.sp +Use \fB1\fR for on (default) and \fB0\fR for off. +.RE + .sp .ne 2 .na diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 77c0784cca0..3489d31d9c9 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018 by Delphix. All rights reserved. */ #include @@ -117,7 +118,18 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) zap_attribute_t za; int error; - zap_cursor_init_serialized(&zc, os, object, *walk); + if (*walk == 0) { + /* + * We don't want to prefetch the entire ZAP object, because + * it can be enormous. Also the primary use of DDT iteration + * is for scrubbing, in which case we will be issuing many + * scrub I/Os for each ZAP block that we read in, so + * reading the ZAP is unlikely to be the bottleneck. + */ + zap_cursor_init_noprefetch(&zc, os, object); + } else { + zap_cursor_init_serialized(&zc, os, object, *walk); + } if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { uchar_t cbuf[sizeof (dde->dde_phys) + 1]; uint64_t csize = za.za_num_integers; diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 2d6740576bb..b4131d91781 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -81,6 +81,13 @@ int zfs_dmu_offset_next_sync = 0; */ int zfs_object_remap_one_indirect_delay_ms = 0; +/* + * Limit the amount we can prefetch with one call to this amount. This + * helps to limit the amount of memory that can be used by prefetching. + * Larger objects should be prefetched a bit at a time. + */ +int dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; + const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, @@ -667,6 +674,11 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, return; } + /* + * See comment before the definition of dmu_prefetch_max. + */ + len = MIN(len, dmu_prefetch_max); + /* * XXX - Note, if the dnode for the requested object is not * already cached, we will do a *synchronous* read in the @@ -2629,6 +2641,10 @@ module_param(zfs_dmu_offset_next_sync, int, 0644); MODULE_PARM_DESC(zfs_dmu_offset_next_sync, "Enable forcing txg sync to find holes"); +module_param(dmu_prefetch_max, int, 0644); +MODULE_PARM_DESC(dmu_prefetch_max, + "Limit one prefetch call to this size"); + /* END CSTYLED */ #endif diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 6d8c498042c..30f62ac43b6 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -49,6 +49,36 @@ #include #include +/* + * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object + * (all leaf blocks) when we start iterating over it. + * + * For zap_cursor_init(), the callers all intend to iterate through all the + * entries. There are a few cases where an error (typically i/o error) could + * cause it to bail out early. + * + * For zap_cursor_init_serialized(), there are callers that do the iteration + * outside of ZFS. Typically they would iterate over everything, but we + * don't have control of that. E.g. zfs_ioc_snapshot_list_next(), + * zcp_snapshots_iter(), and other iterators over things in the MOS - these + * are called by /sbin/zfs and channel programs. The other example is + * zfs_readdir() which iterates over directory entries for the getdents() + * syscall. /sbin/ls iterates to the end (unless it receives a signal), but + * userland doesn't have to. + * + * Given that the ZAP entries aren't returned in a specific order, the only + * legitimate use cases for partial iteration would be: + * + * 1. Pagination: e.g. you only want to display 100 entries at a time, so you + * get the first 100 and then wait for the user to hit "next page", which + * they may never do). + * + * 2. You want to know if there are more than X entries, without relying on + * the zfs-specific implementation of the directory's st_size (which is + * the number of entries). + */ +int zap_iterate_prefetch = B_TRUE; + int fzap_default_block_shift = 14; /* 16k blocksize */ extern inline zap_phys_t *zap_f_phys(zap_t *zap); @@ -1189,6 +1219,21 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) /* retrieve the next entry at or after zc_hash/zc_cd */ /* if no entry, return ENOENT */ + /* + * If we are reading from the beginning, we're almost certain to + * iterate over the entire ZAP object. If there are multiple leaf + * blocks (freeblk > 2), prefetch the whole object (up to + * dmu_prefetch_max bytes), so that we read the leaf blocks + * concurrently. (Unless noprefetch was requested via + * zap_cursor_init_noprefetch()). + */ + if (zc->zc_hash == 0 && zap_iterate_prefetch && + zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) { + dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0, + zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap), + ZIO_PRIORITY_ASYNC_READ); + } + if (zc->zc_leaf && (ZAP_HASH_IDX(zc->zc_hash, zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) != @@ -1333,3 +1378,12 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs) } } } + +#if defined(_KERNEL) +/* BEGIN CSTYLED */ +module_param(zap_iterate_prefetch, int, 0644); +MODULE_PARM_DESC(zap_iterate_prefetch, + "When iterating ZAP object, prefetch it"); + +/* END CSTYLED */ +#endif diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index fa369f79754..467812ff637 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -1472,9 +1472,9 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, * Routines for iterating over the attributes. */ -void -zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, - uint64_t serialized) +static void +zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized, boolean_t prefetch) { zc->zc_objset = os; zc->zc_zap = NULL; @@ -1483,12 +1483,33 @@ zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, zc->zc_serialized = serialized; zc->zc_hash = 0; zc->zc_cd = 0; + zc->zc_prefetch = prefetch; +} +void +zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized) +{ + zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); } +/* + * Initialize a cursor at the beginning of the ZAP object. The entire + * ZAP object will be prefetched. + */ void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { - zap_cursor_init_serialized(zc, os, zapobj, 0); + zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); +} + +/* + * Initialize a cursor at the beginning, but request that we not prefetch + * the entire ZAP object. + */ +void +zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) +{ + zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); } void From 4f809bddc67b152afd9e9a52a01d1af132151a9f Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 13 Jun 2019 09:15:06 +0900 Subject: [PATCH 059/325] Fix lockdep warning on insmod sysfs_attr_init() is required to make lockdep happy for dynamically allocated sysfs attributes. This fixed #8868 on Fedora 29 running kernel-debug. This requirement was introduced in 2.6.34. See include/linux/sysfs.h for what it actually does. Reviewed-by: Brian Behlendorf Reviewed-by: Olaf Faaland Signed-off-by: Tomohiro Kusumi Closes #8868 Closes #8884 --- module/zfs/zfs_sysfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c index 30b5edb01e1..2f5bea9aa99 100644 --- a/module/zfs/zfs_sysfs.c +++ b/module/zfs/zfs_sysfs.c @@ -144,6 +144,10 @@ zfs_kobj_release(struct kobject *kobj) zkobj->zko_attr_count = 0; } +#ifndef sysfs_attr_init +#define sysfs_attr_init(attr) do {} while (0) +#endif + static void zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) { @@ -154,6 +158,7 @@ zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) zkobj->zko_attr_list[attr_num].name = attr_name; zkobj->zko_attr_list[attr_num].mode = 0444; zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num]; + sysfs_attr_init(&zkobj->zko_attr_list[attr_num]); } static int From 77e64c6fffa2af6c3b8aeb8b486873a3fca91e53 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Jun 2019 08:48:43 -0700 Subject: [PATCH 060/325] ztest: dmu_tx_assign() gets ENOSPC in spa_vdev_remove_thread() When running zloop, we occasionally see the following crash: dmu_tx_assign(tx, TXG_WAIT) == 0 (0x1c == 0) ASSERT at ../../module/zfs/vdev_removal.c:1507:spa_vdev_remove_thread()/sbin/ztest(+0x89c3)[0x55faf567b9c3] The error value 0x1c is ENOSPC. The transaction used by spa_vdev_remove_thread() should not be able to fail due to being out of space. i.e. we should not call dmu_tx_hold_space(). This will allow the removal thread to schedule its work even when the pool is low on space. The "slop space" will provide enough free space to sync out the txg. Reviewed-by: Igor Kozhukhov Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens External-issue: DLPX-37853 Closes #8889 --- module/zfs/vdev_removal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index f2d18d9257b..536a982eca2 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -1498,7 +1498,7 @@ spa_vdev_remove_thread(void *arg) dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); - dmu_tx_hold_space(tx, SPA_MAXBLOCKSIZE); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); uint64_t txg = dmu_tx_get_txg(tx); From 19cebf05187d60605ae38ddef9cdf7b10a51deba Mon Sep 17 00:00:00 2001 From: Tulsi Jain Date: Thu, 13 Jun 2019 08:56:15 -0700 Subject: [PATCH 061/325] Restrict filesystem creation if name referred either '.' or '..' This change restricts filesystem creation if the given name contains either '.' or '..' Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Signed-off-by: TulsiJain Closes #8842 Closes #8564 --- include/zfs_namecheck.h | 2 ++ lib/libzfs/libzfs_dataset.c | 10 +++++++++ module/zcommon/zfs_namecheck.c | 21 +++++++++++++++++++ .../zfs_create/zfs_create_009_neg.ksh | 4 +++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/zfs_namecheck.h b/include/zfs_namecheck.h index 527db92b0cf..56d3d36f026 100644 --- a/include/zfs_namecheck.h +++ b/include/zfs_namecheck.h @@ -43,6 +43,8 @@ typedef enum { NAME_ERR_RESERVED, /* entire name is reserved */ NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ NAME_ERR_TOOLONG, /* name is too long */ + NAME_ERR_SELF_REF, /* reserved self path name ('.') */ + NAME_ERR_PARENT_REF, /* reserved parent path name ('..') */ NAME_ERR_NO_AT, /* permission set is missing '@' */ } namecheck_err_t; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 93af50b99cd..3be205f1f43 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -197,6 +197,16 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, "reserved disk name")); break; + case NAME_ERR_SELF_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "self reference, '.' is found in name")); + break; + + case NAME_ERR_PARENT_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent reference, '..' is found in name")); + break; + default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 58b23b0e00b..b1e0de6d818 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -232,6 +232,27 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what) } } + if (*end == '\0' || *end == '/') { + int component_length = end - start; + /* Validate the contents of this component is not '.' */ + if (component_length == 1) { + if (start[0] == '.') { + if (why) + *why = NAME_ERR_SELF_REF; + return (-1); + } + } + + /* Validate the content of this component is not '..' */ + if (component_length == 2) { + if (start[0] == '.' && start[1] == '.') { + if (why) + *why = NAME_ERR_PARENT_REF; + return (-1); + } + } + } + /* Snapshot or bookmark delimiter found */ if (*end == '@' || *end == '#') { /* Multiple delimiters are not allowed */ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh index b8190626c7b..63f5e595ea3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh @@ -90,7 +90,9 @@ set -A args "$TESTPOOL/" "$TESTPOOL//blah" "$TESTPOOL/@blah" \ "$TESTPOOL/blah*blah" "$TESTPOOL/blah blah" \ "-s $TESTPOOL/$TESTFS1" "-b 1092 $TESTPOOL/$TESTFS1" \ "-b 64k $TESTPOOL/$TESTFS1" "-s -b 32k $TESTPOOL/$TESTFS1" \ - "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" + "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" \ + "$TESTPOOL/." "$TESTPOOL/.." "$TESTPOOL/../blah" "$TESTPOOL/./blah" \ + "$TESTPOOL/blah/./blah" "$TESTPOOL/blah/../blah" log_assert "Verify 'zfs create ' fails with bad argument." From cab7d856ea619db0d5d17e0a17fedac273f9945d Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 13 Jun 2019 16:08:24 -0400 Subject: [PATCH 062/325] Move write aggregation memory copy out of vq_lock Memory copy is too heavy operation to do under the congested lock. Moving it out reduces congestion by many times to almost invisible. Since the original zio removed from the queue, and the child zio is not executed yet, I don't see why would the copy need protection. My guess it just remained like this from the time when lock was not dropped here, which was added later to fix lock ordering issue. Multi-threaded sequential write tests with both HDD and SSD pools with ZVOL block sizes of 4KB, 16KB, 64KB and 128KB all show major reduction of lock congestion, saving from 15% to 35% of CPU time and increasing throughput from 10% to 40%. Reviewed-by: Richard Yao Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #8890 --- module/zfs/vdev_queue.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index e74df76b753..86b20f13483 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -709,6 +709,18 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) do { dio = nio; nio = AVL_NEXT(t, dio); + zio_add_child(dio, aio); + vdev_queue_io_remove(vq, dio); + } while (dio != last); + + /* + * We need to drop the vdev queue's lock during zio_execute() to + * avoid a deadlock that we could encounter due to lock order + * reversal between vq_lock and io_lock in zio_change_priority(). + * Use the dropped lock to do memory copy without congestion. + */ + mutex_exit(&vq->vq_lock); + while ((dio = zio_walk_parents(aio, &zl)) != NULL) { ASSERT3U(dio->io_type, ==, aio->io_type); if (dio->io_flags & ZIO_FLAG_NODATA) { @@ -720,16 +732,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) dio->io_offset - aio->io_offset, 0, dio->io_size); } - zio_add_child(dio, aio); - vdev_queue_io_remove(vq, dio); - } while (dio != last); - - /* - * We need to drop the vdev queue's lock to avoid a deadlock that we - * could encounter since this I/O will complete immediately. - */ - mutex_exit(&vq->vq_lock); - while ((dio = zio_walk_parents(aio, &zl)) != NULL) { zio_vdev_io_bypass(dio); zio_execute(dio); } From 592ee2e6ddcad339398e825bdb39569167c550ab Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Jun 2019 13:10:19 -0700 Subject: [PATCH 063/325] compress metadata in later sync passes Starting in sync pass 5 (zfs_sync_pass_dont_compress), we disable compression (including of metadata). Ostensibly this helps the sync passes to converge (i.e. for a sync pass to not need to allocate anything because it is 100% overwrites). However, in practice it increases the average number of sync passes, because when we turn compression off, a lot of block's size will change and thus we have to re-allocate (not overwrite) them. It also increases the number of 128KB allocations (e.g. for indirect blocks and spacemaps) because these will not be compressed. The 128K allocations are especially detrimental to performance on highly fragmented systems, which may have very few free segments of this size, and may need to load new metaslabs to satisfy 128K allocations. We should increase zfs_sync_pass_dont_compress. In practice on a highly fragmented system we see a few 5-pass txg's, a tiny number of 6-pass txg's, and no txg's with more than 6 passes. Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Reviewed by: Pavel Zakharov Reviewed-by: Serapheim Dimitropoulos Reviewed-by: George Wilson Signed-off-by: Matthew Ahrens External-issue: DLPX-63431 Closes #8892 --- man/man5/zfs-module-parameters.5 | 16 ++++++++++++++-- module/zfs/zio.c | 18 ++++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 29374a9d396..2d2a79413d9 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2444,9 +2444,21 @@ Default value: \fB25\fR. \fBzfs_sync_pass_dont_compress\fR (int) .ad .RS 12n -Don't compress starting in this pass +Starting in this sync pass, we disable compression (including of metadata). +With the default setting, in practice, we don't have this many sync passes, +so this has no effect. +.sp +The original intent was that disabling compression would help the sync passes +to converge. However, in practice disabling compression increases the average +number of sync passes, because when we turn compression off, a lot of block's +size will change and thus we have to re-allocate (not overwrite) them. It +also increases the number of 128KB allocations (e.g. for indirect blocks and +spacemaps) because these will not be compressed. The 128K allocations are +especially detrimental to performance on highly fragmented systems, which may +have very few free segments of this size, and may need to load new metaslabs +to satisfy 128K allocations. .sp -Default value: \fB5\fR. +Default value: \fB8\fR. .RE .sp diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 016ac07eabd..5bfff37eb3b 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -96,9 +96,23 @@ int zio_slow_io_ms = (30 * MILLISEC); * * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that * regular blocks are not deferred. + * + * Starting in sync pass 8 (zfs_sync_pass_dont_compress), we disable + * compression (including of metadata). In practice, we don't have this + * many sync passes, so this has no effect. + * + * The original intent was that disabling compression would help the sync + * passes to converge. However, in practice disabling compression increases + * the average number of sync passes, because when we turn compression off, a + * lot of block's size will change and thus we have to re-allocate (not + * overwrite) them. It also increases the number of 128KB allocations (e.g. + * for indirect blocks and spacemaps) because these will not be compressed. + * The 128K allocations are especially detrimental to performance on highly + * fragmented systems, which may have very few free segments of this size, + * and may need to load new metaslabs to satisfy 128K allocations. */ int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */ -int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */ +int zfs_sync_pass_dont_compress = 8; /* don't compress starting in this pass */ int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ /* From 6083f403873f5e427ee8d86f903aa08c7b69daab Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Jun 2019 13:12:39 -0700 Subject: [PATCH 064/325] panic in removal_remap test on 4K devices If the zfs_remove_max_segment tunable is changed to be not a multiple of the sector size, then the device removal code will malfunction and try to create mappings that are smaller than one sector, leading to a panic. On debug bits this assertion will fail in spa_vdev_copy_segment(): ASSERT3U(DVA_GET_ASIZE(&dst), ==, size); On nondebug, the system panics with a stack like: metaslab_free_concrete() metaslab_free_impl() metaslab_free_impl_cb() vdev_indirect_remap() free_from_removing_vdev() metaslab_free_impl() metaslab_free_dva() metaslab_free() Fortunately, the default for zfs_remove_max_segment is 1MB, so this can't occur by default. We hit it during this test because removal_remap.ksh changes zfs_remove_max_segment to 1KB. When testing on 4KB-sector disks, we hit the bug. This change makes the zfs_remove_max_segment tunable more robust, automatically rounding it up to a multiple of the sector size. We also turn some key assertions into VERIFY's so that similar bugs would be caught before they are encoded on disk (and thus avoid a panic-reboot-loop). Reviewed-by: Sean Eric Fagan Reviewed-by: Pavel Zakharov Reviewed-by: Serapheim Dimitropoulos Reviewed-by: Sebastien Roy Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens External-issue: DLPX-61342 Closes #8893 --- include/sys/vdev_removal.h | 8 ++++---- man/man5/zfs-module-parameters.5 | 27 ++++++++++++++++++++++++++ module/zfs/vdev_label.c | 5 ++--- module/zfs/vdev_removal.c | 33 +++++++++++++++++++++++++------- 4 files changed, 59 insertions(+), 14 deletions(-) diff --git a/include/sys/vdev_removal.h b/include/sys/vdev_removal.h index 3962237afda..e3bab0658d6 100644 --- a/include/sys/vdev_removal.h +++ b/include/sys/vdev_removal.h @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2014, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_REMOVAL_H @@ -81,13 +81,13 @@ extern void spa_vdev_condense_suspend(spa_t *); extern int spa_vdev_remove(spa_t *, uint64_t, boolean_t); extern void free_from_removing_vdev(vdev_t *, uint64_t, uint64_t); extern int spa_removal_get_stats(spa_t *, pool_removal_stat_t *); -extern void svr_sync(spa_t *spa, dmu_tx_t *tx); +extern void svr_sync(spa_t *, dmu_tx_t *); extern void spa_vdev_remove_suspend(spa_t *); extern int spa_vdev_remove_cancel(spa_t *); -extern void spa_vdev_removal_destroy(spa_vdev_removal_t *svr); +extern void spa_vdev_removal_destroy(spa_vdev_removal_t *); +extern uint64_t spa_remove_max_segment(spa_t *); extern int vdev_removal_max_span; -extern int zfs_remove_max_segment; #ifdef __cplusplus } diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 2d2a79413d9..8ad3ce466ce 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2194,6 +2194,33 @@ pool cannot be returned to a healthy state prior to removing the device. Default value: \fB0\fR. .RE +.sp +.ne 2 +.na +\fBzfs_removal_suspend_progress\fR (int) +.ad +.RS 12n +.sp +This is used by the test suite so that it can ensure that certain actions +happen while in the middle of a removal. +.sp +Default value: \fB0\fR. +.RE + +.sp +.ne 2 +.na +\fBzfs_remove_max_segment\fR (int) +.ad +.RS 12n +.sp +The largest contiguous segment that we will attempt to allocate when removing +a device. This can be no larger than 16MB. If there is a performance +problem with attempting to allocate large blocks, consider decreasing this. +.sp +Default value: \fB16,777,216\fR (16MB). +.RE + .sp .ne 2 .na diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index a0e373b3dfc..6320732ed6d 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -21,8 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -613,7 +612,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * zfs_remove_max_segment, so we need at least one entry * per zfs_remove_max_segment of allocated data. */ - seg_count += to_alloc / zfs_remove_max_segment; + seg_count += to_alloc / spa_remove_max_segment(spa); fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE, seg_count * diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 536a982eca2..6f64edd8c47 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. */ #include @@ -100,6 +100,8 @@ int zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * removing a device. This can be no larger than SPA_MAXBLOCKSIZE. If * there is a performance problem with attempting to allocate large blocks, * consider decreasing this. + * + * See also the accessor function spa_remove_max_segment(). */ int zfs_remove_max_segment = SPA_MAXBLOCKSIZE; @@ -951,8 +953,10 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, vdev_indirect_mapping_entry_t *entry; dva_t dst = {{ 0 }}; uint64_t start = range_tree_min(segs); + ASSERT0(P2PHASE(start, 1 << spa->spa_min_ashift)); ASSERT3U(maxalloc, <=, SPA_MAXBLOCKSIZE); + ASSERT0(P2PHASE(maxalloc, 1 << spa->spa_min_ashift)); uint64_t size = range_tree_span(segs); if (range_tree_span(segs) > maxalloc) { @@ -983,6 +987,7 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, } } ASSERT3U(size, <=, maxalloc); + ASSERT0(P2PHASE(size, 1 << spa->spa_min_ashift)); /* * An allocation class might not have any remaining vdevs or space @@ -1026,11 +1031,11 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, /* * We can't have any padding of the allocated size, otherwise we will - * misunderstand what's allocated, and the size of the mapping. - * The caller ensures this will be true by passing in a size that is - * aligned to the worst (highest) ashift in the pool. + * misunderstand what's allocated, and the size of the mapping. We + * prevent padding by ensuring that all devices in the pool have the + * same ashift, and the allocation size is a multiple of the ashift. */ - ASSERT3U(DVA_GET_ASIZE(&dst), ==, size); + VERIFY3U(DVA_GET_ASIZE(&dst), ==, size); entry = kmem_zalloc(sizeof (vdev_indirect_mapping_entry_t), KM_SLEEP); DVA_MAPPING_SET_SRC_OFFSET(&entry->vime_mapping, start); @@ -1363,6 +1368,20 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, range_tree_destroy(segs); } +/* + * The size of each removal mapping is limited by the tunable + * zfs_remove_max_segment, but we must adjust this to be a multiple of the + * pool's ashift, so that we don't try to split individual sectors regardless + * of the tunable value. (Note that device removal requires that all devices + * have the same ashift, so there's no difference between spa_min_ashift and + * spa_max_ashift.) The raw tunable should not be used elsewhere. + */ +uint64_t +spa_remove_max_segment(spa_t *spa) +{ + return (P2ROUNDUP(zfs_remove_max_segment, 1 << spa->spa_max_ashift)); +} + /* * The removal thread operates in open context. It iterates over all * allocated space in the vdev, by loading each metaslab's spacemap. @@ -1385,7 +1404,7 @@ spa_vdev_remove_thread(void *arg) spa_t *spa = arg; spa_vdev_removal_t *svr = spa->spa_vdev_removal; vdev_copy_arg_t vca; - uint64_t max_alloc = zfs_remove_max_segment; + uint64_t max_alloc = spa_remove_max_segment(spa); uint64_t last_txg = 0; spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); @@ -1511,7 +1530,7 @@ spa_vdev_remove_thread(void *arg) vd = vdev_lookup_top(spa, svr->svr_vdev_id); if (txg != last_txg) - max_alloc = zfs_remove_max_segment; + max_alloc = spa_remove_max_segment(spa); last_txg = txg; spa_vdev_copy_impl(vd, svr, &vca, &max_alloc, tx); From b033353b2548a357a7e2bbde2cf68b2ccf8f0054 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Jun 2019 13:14:35 -0700 Subject: [PATCH 065/325] lz4_decompress_abd declared but not defined `lz4_decompress_abd` is declared in zio_compress.h but it is not defined anywhere. The declaration should be removed. Reviewed by: Dan Kimmel Reviewed-by: Allan Jude Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens External-issue: DLPX-47477 Closes #8894 --- include/sys/zio_compress.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h index 1642823d3d4..208117eee4b 100644 --- a/include/sys/zio_compress.h +++ b/include/sys/zio_compress.h @@ -105,8 +105,7 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); -extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len, - int level); + /* * Compress and decompress data if necessary. */ From 9e54b9d930849e2ccb9ae12d729c7f20e54c670f Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 13 Jun 2019 13:15:46 -0700 Subject: [PATCH 066/325] Python config cleanup Don't require Python at configure/build unless building pyzfs. Move ZFS_AC_PYTHON_MODULE to always-pyzfs.m4 where it is used. Make test syntax more consistent. Sponsored by: iXsystems, Inc. Reviewed-by: Neal Gompa Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #8895 --- config/always-python.m4 | 87 +++++++++++------------------------------ config/always-pyzfs.m4 | 45 ++++++++++++++------- 2 files changed, 53 insertions(+), 79 deletions(-) diff --git a/config/always-python.m4 b/config/always-python.m4 index 7cfefd9ebca..c1c07597e68 100644 --- a/config/always-python.m4 +++ b/config/always-python.m4 @@ -1,47 +1,3 @@ -dnl # -dnl # ZFS_AC_PYTHON_VERSION(version, [action-if-true], [action-if-false]) -dnl # -dnl # Verify Python version -dnl # -AC_DEFUN([ZFS_AC_PYTHON_VERSION], [ - ver_check=`$PYTHON -c "import sys; print (sys.version.split()[[0]] $1)"` - AS_IF([test "$ver_check" = "True"], [ - m4_ifvaln([$2], [$2]) - ], [ - m4_ifvaln([$3], [$3]) - ]) -]) - -dnl # -dnl # ZFS_AC_PYTHON_VERSION_IS_2 -dnl # ZFS_AC_PYTHON_VERSION_IS_3 -dnl # -dnl # Tests if the $PYTHON_VERSION matches 2.x or 3.x. -dnl # -AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_2], - [test "${PYTHON_VERSION%%\.*}" = "2"]) -AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_3], - [test "${PYTHON_VERSION%%\.*}" = "3"]) - -dnl # -dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) -dnl # -dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE -dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html -dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. -dnl # -AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ - PYTHON_NAME=`basename $PYTHON` - AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) - AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ - AC_MSG_RESULT(yes) - m4_ifvaln([$2], [$2]) - ], [ - AC_MSG_RESULT(no) - m4_ifvaln([$3], [$3]) - ]) -]) - dnl # dnl # The majority of the python scripts are written to be compatible dnl # with Python 2.6 and Python 3.4. Therefore, they may be installed @@ -66,35 +22,38 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ [AC_MSG_ERROR([Unknown --with-python value '$with_python'])] ) - AS_IF([test $PYTHON != :], [ - AS_IF([$PYTHON --version >/dev/null 2>&1], - [AM_PATH_PYTHON([2.6], [], [:])], - [AC_MSG_ERROR([Cannot find $PYTHON in your system path])] - ) - ]) - AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) - AM_CONDITIONAL([USING_PYTHON_2], [ZFS_AC_PYTHON_VERSION_IS_2]) - AM_CONDITIONAL([USING_PYTHON_3], [ZFS_AC_PYTHON_VERSION_IS_3]) - dnl # dnl # Minimum supported Python versions for utilities: - dnl # Python 2.6.x, or Python 3.4.x + dnl # Python 2.6 or Python 3.4 dnl # - AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ - ZFS_AC_PYTHON_VERSION([>= '2.6'], [ true ], - [AC_MSG_ERROR("Python >= 2.6.x is not available")]) + AM_PATH_PYTHON([], [], [:]) + AS_IF([test -z "$PYTHON_VERSION"], [ + PYTHON_VERSION=$(basename $PYTHON | tr -cd 0-9.) ]) + PYTHON_MINOR=${PYTHON_VERSION#*\.} - AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ - ZFS_AC_PYTHON_VERSION([>= '3.4'], [ true ], - [AC_MSG_ERROR("Python >= 3.4.x is not available")]) - ]) + AS_CASE([$PYTHON_VERSION], + [2.*], [ + AS_IF([test $PYTHON_MINOR -lt 6], + [AC_MSG_ERROR("Python >= 2.6 is required")]) + ], + [3.*], [ + AS_IF([test $PYTHON_MINOR -lt 4], + [AC_MSG_ERROR("Python >= 3.4 is required")]) + ], + [:|2|3], [], + [PYTHON_VERSION=3] + ) + + AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) + AM_CONDITIONAL([USING_PYTHON_2], [test "x${PYTHON_VERSION%%\.*}" = x2]) + AM_CONDITIONAL([USING_PYTHON_3], [test "x${PYTHON_VERSION%%\.*}" = x3]) dnl # dnl # Request that packages be built for a specific Python version. dnl # - AS_IF([test $with_python != check], [ - PYTHON_PKG_VERSION=`echo ${PYTHON} | tr -d 'a-zA-Z.'` + AS_IF([test "x$with_python" != xcheck], [ + PYTHON_PKG_VERSION=$(echo $PYTHON_VERSION | tr -d .) DEFINE_PYTHON_PKG_VERSION='--define "__use_python_pkg_version '${PYTHON_PKG_VERSION}'"' DEFINE_PYTHON_VERSION='--define "__use_python '${PYTHON}'"' ], [ diff --git a/config/always-pyzfs.m4 b/config/always-pyzfs.m4 index 6f32e98feed..f620a8f9a18 100644 --- a/config/always-pyzfs.m4 +++ b/config/always-pyzfs.m4 @@ -1,5 +1,24 @@ dnl # -dnl # Determines if pyzfs can be built, requires Python 2.7 or latter. +dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) +dnl # +dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE +dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html +dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. +dnl # +AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ + PYTHON_NAME=$(basename $PYTHON) + AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) + AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ + AC_MSG_RESULT(yes) + m4_ifvaln([$2], [$2]) + ], [ + AC_MSG_RESULT(no) + m4_ifvaln([$3], [$3]) + ]) +]) + +dnl # +dnl # Determines if pyzfs can be built, requires Python 2.7 or later. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ AC_ARG_ENABLE([pyzfs], @@ -18,7 +37,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ DEFINE_PYZFS='--without pyzfs' ]) ], [ - AS_IF([test $PYTHON != :], [ + AS_IF([test "$PYTHON" != :], [ DEFINE_PYZFS='' ], [ enable_pyzfs=no @@ -31,20 +50,16 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # Require python-devel libraries dnl # AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ - AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ - PYTHON_REQUIRED_VERSION=">= '2.7.0'" - ], [ - AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ - PYTHON_REQUIRED_VERSION=">= '3.4.0'" - ], [ - AC_MSG_ERROR("Python $PYTHON_VERSION unknown") - ]) - ]) + AS_CASE([$PYTHON_VERSION], + [3.*], [PYTHON_REQUIRED_VERSION=">= '3.4.0'"], + [2.*], [PYTHON_REQUIRED_VERSION=">= '2.7.0'"], + [AC_MSG_ERROR("Python $PYTHON_VERSION unknown")] + ) AX_PYTHON_DEVEL([$PYTHON_REQUIRED_VERSION], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_REQUIRED_VERSION development library is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -57,7 +72,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([setuptools], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION setuptools is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -70,7 +85,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([cffi], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION cffi is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -81,7 +96,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # AS_IF([test "x$enable_pyzfs" = xcheck], [enable_pyzfs=yes]) - AM_CONDITIONAL([PYZFS_ENABLED], [test x$enable_pyzfs = xyes]) + AM_CONDITIONAL([PYZFS_ENABLED], [test "x$enable_pyzfs" = xyes]) AC_SUBST([PYZFS_ENABLED], [$enable_pyzfs]) AC_SUBST(pythonsitedir, [$PYTHON_SITE_PKG]) From ed7b0d357a070d28710abe9a6c6fc22c4fcbe854 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 14 Jun 2019 17:07:34 -0400 Subject: [PATCH 067/325] Minimize aggsum_compare(&arc_size, arc_c) calls. For busy ARC situation when arc_size close to arc_c is desired. But then it is quite likely that aggsum_compare(&arc_size, arc_c) will need to flush per-CPU buckets to find exact comparison result. Doing that often in a hot path penalizes whole idea of aggsum usage there, since it replaces few simple atomic additions with dozens of lock acquisitions. Replacing aggsum_compare() with aggsum_upper_bound() in code increasing arc_p when ARC is growing (arc_size < arc_c) according to PMC profiles allows to save ~5% of CPU time in aggsum code during sequential write to 12 ZVOLs with 16KB block size on large dual-socket system. I suppose there some minor arc_p behavior change due to lower precision of the new code, but I don't think it is a big deal, since it should affect only very small window in time (aggsum buckets are flushed every second) and in ARC size (buckets are limited to 10 average ARC blocks per CPU). Reviewed-by: Chris Dunlop Reviewed-by: Richard Elling Reviewed-by: George Melikov Reviewed-by: Allan Jude Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #8901 --- module/zfs/arc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index a7e7d26996f..720365c4a93 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5606,7 +5606,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * If we are growing the cache, and we are adding anonymous * data, and we have outgrown arc_p, update arc_p */ - if (aggsum_compare(&arc_size, arc_c) < 0 && + if (aggsum_upper_bound(&arc_size) < arc_c && hdr->b_l1hdr.b_state == arc_anon && (zfs_refcount_count(&arc_anon->arcs_size) + zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) From b5e8d14a4b0c25b19c4e148123e5d579add0cfa5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 19 Jun 2019 10:39:28 -0700 Subject: [PATCH 068/325] ZTS: Fix mmp_interval failure The mmp_interval test case was failing on Fedora 30 due to the built-in 'echo' command terminating the script when it was unable to write to the sysfs module parameter. This change in behavior was observed with ksh-2020.0.0-alpha1. Resolve the issue by using the external cat command which fails gracefully as expected. Additionally, remove some incorrect quotes around the $? return values. Reviewed-by: Giuseppe Di Natale Reviewed-by: Tony Hutter Reviewed-by: Olaf Faaland Reviewed-by: Richard Elling Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #8906 --- tests/zfs-tests/include/libtest.shlib | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 1b841d7ba02..c7cb36a8d0e 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3494,13 +3494,13 @@ function set_tunable_impl Linux) typeset zfs_tunables="/sys/module/$module/parameters" [[ -w "$zfs_tunables/$tunable" ]] || return 1 - echo -n "$value" > "$zfs_tunables/$tunable" - return "$?" + cat >"$zfs_tunables/$tunable" <<<"$value" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 echo "${tunable}/${mdb_cmd}0t${value}" | mdb -kw - return "$?" + return $? ;; esac } @@ -3527,7 +3527,7 @@ function get_tunable_impl typeset zfs_tunables="/sys/module/$module/parameters" [[ -f "$zfs_tunables/$tunable" ]] || return 1 cat $zfs_tunables/$tunable - return "$?" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 From 5b0327bc5795b5ae8b1926d90a9b6b8b10433f72 Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Wed, 19 Jun 2019 11:44:44 -0700 Subject: [PATCH 069/325] kmod-zfs-devel rpm should provide kmod-spl-devel When configure is run with --with-spec=redhat, and rpms are built, the kmod-zfs-devel package is missing Provides: kmod-spl-devel = %{version} which is required by software such as Lustre which builds against zfs kmods. Adding it makes it easier for such software to build against both zfs-0.7 (where SPL is separate and may be missing) and zfs-0.8. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Olaf Faaland Closes #8930 --- rpm/redhat/zfs-kmod.spec.in | 1 + 1 file changed, 1 insertion(+) diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in index 473f2d03250..f632c4867e6 100644 --- a/rpm/redhat/zfs-kmod.spec.in +++ b/rpm/redhat/zfs-kmod.spec.in @@ -41,6 +41,7 @@ This package contains the ZFS kernel modules. %package -n kmod-%{kmod_name}-devel Summary: ZFS kernel module(s) devel common Group: System Environment/Kernel +Provides: kmod-spl-devel = %{version} %description -n kmod-%{kmod_name}-devel This package provides the header files and objects to build kernel modules. From 2087b6cf4941b936583b48471a79b252dc0a9dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Wed, 19 Jun 2019 20:53:37 +0200 Subject: [PATCH 070/325] Fix memory leak in check_disk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Allan Jude Reviewed-by: Tony Hutter Reviewed-by: Richard Elling Signed-off-by: Michael Niewöhner Closes #8897 Closes #8911 --- cmd/zpool/zpool_vdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 7ea9d742006..52c696816f7 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -433,6 +433,7 @@ check_disk(const char *path, blkid_cache cache, int force, char *value = blkid_get_tag_value(cache, "TYPE", path); (void) fprintf(stderr, gettext("%s is in use and contains " "a %s filesystem.\n"), path, value ? value : "unknown"); + free(value); return (-1); } From fb6f6b47d6f9b63e5768635b74160d94b3fe33f5 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 20 Jun 2019 04:27:31 +0900 Subject: [PATCH 071/325] Use ZFS_DEV macro instead of literals The rest of the code/comments use ZFS_DEV, so sync with that. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Signed-off-by: Tomohiro Kusumi Closes #8912 --- lib/libzfs_core/libzfs_core.c | 6 +++--- lib/libzpool/util.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 99fc84d0461..eb332bc94e8 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -52,7 +52,7 @@ * * - Thin Layer. libzfs_core is a thin layer, marshaling arguments * to/from the kernel ioctls. There is generally a 1:1 correspondence - * between libzfs_core functions and ioctls to /dev/zfs. + * between libzfs_core functions and ioctls to ZFS_DEV. * * - Clear Atomicity. Because libzfs_core functions are generally 1:1 * with kernel ioctls, and kernel ioctls are general atomic, each @@ -135,7 +135,7 @@ libzfs_core_init(void) { (void) pthread_mutex_lock(&g_lock); if (g_refcount == 0) { - g_fd = open("/dev/zfs", O_RDWR); + g_fd = open(ZFS_DEV, O_RDWR); if (g_fd < 0) { (void) pthread_mutex_unlock(&g_lock); return (errno); @@ -499,7 +499,7 @@ lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) * The snapshots must all be in the same pool. * The value is the name of the hold (string type). * - * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). * In this case, when the cleanup_fd is closed (including on process * termination), the holds will be released. If the system is shut down * uncleanly, the holds will be released when the pool is next opened diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c index ad05d2239ae..67bc209ceec 100644 --- a/lib/libzpool/util.c +++ b/lib/libzpool/util.c @@ -223,7 +223,7 @@ pool_active(void *unused, const char *name, uint64_t guid, * Use ZFS_IOC_POOL_SYNC to confirm if a pool is active */ - fd = open("/dev/zfs", O_RDWR); + fd = open(ZFS_DEV, O_RDWR); if (fd < 0) return (-1); From 01cc94f68d89c71943ecc5bd3dfaff6171dfe157 Mon Sep 17 00:00:00 2001 From: dacianstremtan <35844628+dacianstremtan@users.noreply.github.com> Date: Thu, 20 Jun 2019 15:27:14 -0400 Subject: [PATCH 072/325] Replace whereis with type in zfs-lib.sh The whereis command should not be used since it may not exist in the initramfs. The dracut plymouth module also uses the type command instead of whereis. Reviewed-by: Brian Behlendorf Reviewed-by: Garrett Fields Signed-off-by: Dacian Reece-Stremtan Closes #8920 Closes #8938 --- contrib/dracut/90zfs/zfs-lib.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/dracut/90zfs/zfs-lib.sh.in b/contrib/dracut/90zfs/zfs-lib.sh.in index 23c07af9e86..44021c6e5fc 100755 --- a/contrib/dracut/90zfs/zfs-lib.sh.in +++ b/contrib/dracut/90zfs/zfs-lib.sh.in @@ -144,7 +144,7 @@ ask_for_password() { { flock -s 9; # Prompt for password with plymouth, if installed and running. - if whereis plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then + if type plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then plymouth ask-for-password \ --prompt "$ply_prompt" --number-of-tries="$ply_tries" \ --command="$ply_cmd" From b96ceeead2a9c7e0973fcef58356defb10f6df26 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 20 Jun 2019 15:29:51 -0400 Subject: [PATCH 073/325] Allow unencrypted children of encrypted datasets When encryption was first added to ZFS, we made a decision to prevent users from creating unencrypted children of encrypted datasets. The idea was to prevent users from inadvertently leaving some of their data unencrypted. However, since the release of 0.8.0, some legitimate reasons have been brought up for this behavior to be allowed. This patch simply removes this limitation from all code paths that had checks for it and updates the tests accordingly. Reviewed-by: Jason King Reviewed-by: Sean Eric Fagan Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8737 Closes #8870 --- include/sys/dsl_crypt.h | 1 - lib/libzfs/libzfs_crypto.c | 41 +--------------- lib/libzfs/libzfs_dataset.c | 13 ++--- lib/libzfs/libzfs_sendrecv.c | 48 ++++++++----------- module/zfs/dmu_objset.c | 7 --- module/zfs/dmu_recv.c | 24 +++++----- module/zfs/dsl_crypt.c | 44 +---------------- .../zfs_create/zfs_create_encrypted.ksh | 20 ++++---- .../zfs_receive/zfs_receive_to_encrypted.ksh | 14 +++--- .../zfs_rename/zfs_rename_to_encrypted.ksh | 14 +++--- 10 files changed, 63 insertions(+), 163 deletions(-) diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index c2c0a548a48..0f73ea6c6df 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -209,7 +209,6 @@ void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx); -int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index 3318a6bd2e1..d31f43b1fdf 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -740,14 +740,6 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, pcrypt = ZIO_CRYPT_OFF; } - /* Check for encryption being explicitly truned off */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Invalid encryption value. Dataset must be encrypted.")); - goto out; - } - /* Get the inherited encryption property if we don't have it locally */ if (!local_crypt) crypt = pcrypt; @@ -849,10 +841,7 @@ int zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, char *parent_name, nvlist_t *props) { - int ret; char errbuf[1024]; - zfs_handle_t *pzhp = NULL; - uint64_t pcrypt, ocrypt; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "Encryption clone error")); @@ -865,40 +854,12 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) { - ret = EINVAL; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Encryption properties must inherit from origin dataset.")); - goto out; - } - - /* get a reference to parent dataset, should never be NULL */ - pzhp = make_dataset_handle(hdl, parent_name); - if (pzhp == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Failed to lookup parent.")); - return (ENOENT); + return (EINVAL); } - /* Lookup parent's crypt */ - pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); - ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); - - /* all children of encrypted parents must be encrypted */ - if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Cannot create unencrypted clone as a child " - "of encrypted parent.")); - goto out; - } - - zfs_close(pzhp); return (0); - -out: - if (pzhp != NULL) - zfs_close(pzhp); - return (ret); } typedef struct loadkeys_cbdata { diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 3be205f1f43..ee5a6412ead 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -4632,16 +4632,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); } else if (errno == EACCES) { - if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == - ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot rename an unencrypted dataset to " - "be a decendent of an encrypted one")); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot move encryption child outside of " - "its encryption root")); - } + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encrypted child outside of " + "its encryption root")); (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index f69a46430bb..052b96b9b65 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2827,7 +2827,7 @@ recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname, is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); - /* we don't need to do anything for unencrypted filesystems */ + /* we don't need to do anything for unencrypted datasets */ if (crypt == ZIO_CRYPT_OFF) { zfs_close(zhp); continue; @@ -4210,34 +4210,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, goto out; } - /* - * It is invalid to receive a properties stream that was - * unencrypted on the send side as a child of an encrypted - * parent. Technically there is nothing preventing this, but - * it would mean that the encryption=off property which is - * locally set on the send side would not be received correctly. - * We can infer encryption=off if the stream is not raw and - * properties were included since the send side will only ever - * send the encryption property in a raw nvlist header. This - * check will be avoided if the user specifically overrides - * the encryption property on the command line. - */ - if (!raw && rcvprops != NULL && - !nvlist_exists(cmdprops, - zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { - uint64_t crypt; - - crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); - - if (crypt != ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent '%s' must not be encrypted to " - "receive unenecrypted property"), name); - err = zfs_error(hdl, EZFS_BADPROP, errbuf); - zfs_close(zhp); - goto out; - } - } zfs_close(zhp); newfs = B_TRUE; @@ -4274,6 +4246,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, &oxprops, &wkeydata, &wkeylen, errbuf)) != 0) goto out; + /* + * When sending with properties (zfs send -p), the encryption property + * is not included because it is a SETONCE property and therefore + * treated as read only. However, we are always able to determine its + * value because raw sends will include it in the DRR_BDEGIN payload + * and non-raw sends with properties are not allowed for encrypted + * datasets. Therefore, if this is a non-raw properties stream, we can + * infer that the value should be ZIO_CRYPT_OFF and manually add that + * to the received properties. + */ + if (stream_wantsnewfs && !raw && rcvprops != NULL && + !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { + if (oxprops == NULL) + oxprops = fnvlist_alloc(); + fnvlist_add_uint64(oxprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF); + } + err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable, raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags, diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index f95915b9e25..30436b188fc 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1348,13 +1348,6 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EINVAL)); } - error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir); - if (error != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dir_rele(pdd, FTAG); - return (error); - } - dsl_dataset_rele(origin, FTAG); dsl_dir_rele(pdd, FTAG); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 65a031b42cc..3481feb21db 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -327,7 +327,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); + error = dsl_dataset_hold(dp, buf, FTAG, &ds); if (error != 0) return (error); @@ -345,13 +345,13 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dmu_objset_create_crypt_check(ds->ds_dir, drba->drba_dcp, &will_encrypt); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (will_encrypt && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } } @@ -364,25 +364,25 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } /* can't recv below anything but filesystems (eg. no ZVOLs) */ error = dmu_objset_from_ds(ds, &os); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (dmu_objset_type(os) != DMU_OST_ZFS) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ZFS_ERR_WRONG_PARENT)); } @@ -392,31 +392,31 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold_flags(dp, drba->drba_origin, dsflags, FTAG, &origin); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (!origin->ds_is_snapshot) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); } if (origin->ds_dir->dd_crypto_obj != 0 && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } dsl_dataset_rele_flags(origin, dsflags, FTAG); } - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); error = 0; } return (error); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 21db8e51ffd..0c0ffaadd8f 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1610,15 +1610,8 @@ dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) int ret; uint64_t curr_rddobj, parent_rddobj; - if (dd->dd_crypto_obj == 0) { - /* children of encrypted parents must be encrypted */ - if (newparent->dd_crypto_obj != 0) { - ret = SET_ERROR(EACCES); - goto error; - } - + if (dd->dd_crypto_obj == 0) return (0); - } ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); if (ret != 0) @@ -1747,34 +1740,6 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, kmem_free(keylocation, ZAP_MAXVALUELEN); } -int -dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd) -{ - int ret; - uint64_t pcrypt, crypt; - - /* - * Check that we are not making an unencrypted child of an - * encrypted parent. - */ - ret = dsl_dir_get_crypt(parentdd, &pcrypt); - if (ret != 0) - return (ret); - - ret = dsl_dir_get_crypt(origindd, &crypt); - if (ret != 0) - return (ret); - - ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); - ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - - return (0); -} - - int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt) @@ -1805,13 +1770,6 @@ dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - /* - * We can't create an unencrypted child of an encrypted parent - * under any circumstances. - */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - /* check for valid dcp with no encryption (inherited or local) */ if (crypt == ZIO_CRYPT_OFF) { /* Must not specify encryption params */ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh index 9d5ecab0dfe..7e5072f0d5f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -51,10 +51,10 @@ # yes unspec 0 1 no no keyformat specified # yes unspec 1 0 yes new encryption root, crypt inherited # yes unspec 1 1 yes new encryption root, crypt inherited -# yes off 0 0 no unencrypted child of encrypted parent -# yes off 0 1 no unencrypted child of encrypted parent -# yes off 1 0 no unencrypted child of encrypted parent -# yes off 1 1 no unencrypted child of encrypted parent +# yes off 0 0 yes unencrypted child of encrypted parent +# yes off 0 1 no keylocation given, but crypt off +# yes off 1 0 no keyformat given, but crypt off +# yes off 1 1 no keyformat given, but crypt off # yes on 0 0 yes inherited encryption, local crypt # yes on 0 1 no no keyformat specified for new key # yes on 1 0 yes new encryption root @@ -113,7 +113,9 @@ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" -log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/c5)" == "off" + log_mustnot zfs create -o encryption=off -o keylocation=prompt \ $TESTPOOL/$TESTFS2/c5 log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ @@ -122,13 +124,13 @@ log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "$TESTPOOL/$TESTFS2/c5" + "$TESTPOOL/$TESTFS2/c6" log_mustnot zfs create -o encryption=on -o keylocation=prompt \ - $TESTPOOL/$TESTFS2/c6 + $TESTPOOL/$TESTFS2/c7 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c7" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c8" log_pass "ZFS creates datasets only if they have a valid combination of" \ "encryption properties set." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh index 57896c6fd30..f8e53f02c23 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -46,7 +46,7 @@ function cleanup log_onexit cleanup -log_assert "ZFS should receive to an encrypted child dataset" +log_assert "ZFS should receive encrypted filesystems into child dataset" typeset passphrase="password" typeset snap="$TESTPOOL/$TESTFS@snap" @@ -60,11 +60,13 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_note "Verifying ZFS will receive to an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" -log_note "Verifying 'send -p' will not receive to an encrypted child" -log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_note "Verifying 'send -p' will receive to an encrypted child" +log_must eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c2)" == "off" -log_note "Verifying 'send -R' will not receive to an encrypted child" -log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_note "Verifying 'send -R' will receive to an encrypted child" +log_must eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c3)" == "off" log_note "Verifying ZFS will not receive to an encrypted child when the" \ "parent key is unloaded" @@ -72,4 +74,4 @@ log_must zfs unmount $TESTPOOL/$TESTFS1 log_must zfs unload-key $TESTPOOL/$TESTFS1 log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" -log_pass "ZFS can receive to an encrypted child dataset" +log_pass "ZFS can receive encrypted filesystems into child dataset" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh index 400592aaca2..1b9c6e3c704 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -23,12 +23,13 @@ # # DESCRIPTION: -# 'zfs rename' should not rename an unencrypted dataset to a child +# 'zfs rename' should be able to move an unencrypted dataset to a child # of an encrypted dataset # # STRATEGY: # 1. Create an encrypted dataset -# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# 2. Rename the default dataset to a child of the encrypted dataset +# 3. Confirm the child dataset doesn't have any encryption properties # verify_runnable "both" @@ -36,16 +37,17 @@ verify_runnable "both" function cleanup { datasetexists $TESTPOOL/$TESTFS2 && \ - log_must zfs destroy $TESTPOOL/$TESTFS2 + log_must zfs destroy -r $TESTPOOL/$TESTFS2 } log_onexit cleanup -log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ +log_assert "'zfs rename' should allow renaming an unencrypted dataset to a" \ "child of an encrypted dataset" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" -log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/$TESTFS)" == "off" -log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ +log_pass "'zfs rename' allows renaming an unencrypted dataset to a child" \ "of an encrypted dataset" From 9af524b0ee26c821cf412b796ef178e108c5cb10 Mon Sep 17 00:00:00 2001 From: Igor K Date: Fri, 21 Jun 2019 04:29:02 +0300 Subject: [PATCH 074/325] Update vdev_ops_t from illumos Align vdev_ops_t from illumos for better compatibility. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #8925 --- module/zfs/vdev_disk.c | 26 ++++++------- module/zfs/vdev_file.c | 52 ++++++++++++------------- module/zfs/vdev_indirect.c | 26 ++++++------- module/zfs/vdev_mirror.c | 78 +++++++++++++++++++------------------- module/zfs/vdev_missing.c | 52 ++++++++++++------------- module/zfs/vdev_raidz.c | 26 ++++++------- module/zfs/vdev_root.c | 26 ++++++------- 7 files changed, 143 insertions(+), 143 deletions(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 1419ae6ad54..1686ddfce77 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -935,19 +935,19 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) } vdev_ops_t vdev_disk_ops = { - vdev_disk_open, - vdev_disk_close, - vdev_default_asize, - vdev_disk_io_start, - vdev_disk_io_done, - NULL, - NULL, - vdev_disk_hold, - vdev_disk_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_disk_open, + .vdev_op_close = vdev_disk_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_disk_io_start, + .vdev_op_io_done = vdev_disk_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_disk_hold, + .vdev_op_rele = vdev_disk_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler, diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index c155057852a..b79017f3a61 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -277,19 +277,19 @@ vdev_file_io_done(zio_t *zio) } vdev_ops_t vdev_file_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_FILE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_FILE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; void @@ -313,19 +313,19 @@ vdev_file_fini(void) #ifndef _KERNEL vdev_ops_t vdev_disk_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; #endif diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 4d18e33c0ab..4539fa638ad 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1842,19 +1842,19 @@ vdev_indirect_io_done(zio_t *zio) } vdev_ops_t vdev_indirect_ops = { - vdev_indirect_open, - vdev_indirect_close, - vdev_default_asize, - vdev_indirect_io_start, - vdev_indirect_io_done, - NULL, - NULL, - NULL, - NULL, - vdev_indirect_remap, - NULL, - VDEV_TYPE_INDIRECT, /* name of this vdev type */ - B_FALSE /* leaf vdev */ + .vdev_op_open = vdev_indirect_open, + .vdev_op_close = vdev_indirect_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_indirect_io_start, + .vdev_op_io_done = vdev_indirect_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = vdev_indirect_remap, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_INDIRECT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 59cc2dcdd2c..23ff75bfc96 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -786,51 +786,51 @@ vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_mirror_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_MIRROR, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_MIRROR, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_replacing_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_REPLACING, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_REPLACING, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_spare_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_SPARE, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_SPARE, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index d85993bff05..205b23eba7f 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -80,33 +80,33 @@ vdev_missing_io_done(zio_t *zio) } vdev_ops_t vdev_missing_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_MISSING, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_MISSING, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; vdev_ops_t vdev_hole_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_HOLE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_HOLE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 215cd1c1206..327b186713f 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2403,17 +2403,17 @@ vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res) } vdev_ops_t vdev_raidz_ops = { - vdev_raidz_open, - vdev_raidz_close, - vdev_raidz_asize, - vdev_raidz_io_start, - vdev_raidz_io_done, - vdev_raidz_state_change, - vdev_raidz_need_resilver, - NULL, - NULL, - NULL, - vdev_raidz_xlate, - VDEV_TYPE_RAIDZ, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_raidz_open, + .vdev_op_close = vdev_raidz_close, + .vdev_op_asize = vdev_raidz_asize, + .vdev_op_io_start = vdev_raidz_io_start, + .vdev_op_io_done = vdev_raidz_io_done, + .vdev_op_state_change = vdev_raidz_state_change, + .vdev_op_need_resilver = vdev_raidz_need_resilver, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_raidz_xlate, + .vdev_op_type = VDEV_TYPE_RAIDZ, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index e40b7ce8e4e..7170f701360 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -140,17 +140,17 @@ vdev_root_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_root_ops = { - vdev_root_open, - vdev_root_close, - vdev_default_asize, - NULL, /* io_start - not applicable to the root */ - NULL, /* io_done - not applicable to the root */ - vdev_root_state_change, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_ROOT, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_root_open, + .vdev_op_close = vdev_root_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = NULL, /* not applicable to the root */ + .vdev_op_io_done = NULL, /* not applicable to the root */ + .vdev_op_state_change = vdev_root_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; From 3c2a42fd254917db78484c428bd317ec7189c968 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 20 Jun 2019 18:30:40 -0700 Subject: [PATCH 075/325] dedup=verify doesn't clear the blkptr's dedup flag The logic to handle strong checksum collisions where the data doesn't match is incorrect. It is not clearing the dedup bit of the blkptr, which can cause a panic later in zio_ddt_free() due to the dedup table not matching what is in the blkptr. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens External-issue: DLPX-48097 Closes #8936 --- module/zfs/zio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 5bfff37eb3b..f9503bd3ff8 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3192,7 +3192,9 @@ zio_ddt_write(zio_t *zio) BP_ZERO(bp); } else { zp->zp_dedup = B_FALSE; + BP_SET_DEDUP(bp, B_FALSE); } + ASSERT(!BP_GET_DEDUP(bp)); zio->io_pipeline = ZIO_WRITE_PIPELINE; ddt_exit(ddt); return (zio); From ab24c9cd4cbba2c4d5cb68f3e1e08dcf2275dc34 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Fri, 21 Jun 2019 10:31:53 +0900 Subject: [PATCH 076/325] Prevent pointer to an out-of-scope local variable `show_str` could be a pointer to a local variable in stack which is out-of-scope by the time `return (snprintf(buf, buflen, "%s\n", show_str));` is called. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #8924 Closes #8940 --- module/zfs/zfs_sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c index 2f5bea9aa99..bb7f3b69a66 100644 --- a/module/zfs/zfs_sysfs.c +++ b/module/zfs/zfs_sysfs.c @@ -264,6 +264,7 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, char *buf, size_t buflen) { const char *show_str; + char number[32]; /* For dataset properties list the dataset types that apply */ if (strcmp(attr_name, "datasets") == 0 && @@ -291,8 +292,6 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, } else if (strcmp(attr_name, "values") == 0) { show_str = property->pd_values ? property->pd_values : ""; } else if (strcmp(attr_name, "default") == 0) { - char number[32]; - switch (property->pd_proptype) { case PROP_TYPE_NUMBER: (void) snprintf(number, sizeof (number), "%llu", From 1fd28bd8d4e102a4ce5e4910427f612c7cf73e68 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 21 Jun 2019 09:40:56 -0700 Subject: [PATCH 077/325] Add SCSI_PASSTHROUGH to zvols to enable UNMAP support When exporting ZVOLs as SCSI LUNs, by default Windows will not issue them UNMAP commands. This reduces storage efficiency in many cases. We add the SCSI_PASSTHROUGH flag to the zvol's device queue, which lets the SCSI target logic know that it can handle SCSI commands. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: John Gallagher Signed-off-by: Paul Dagnelie Closes #8933 --- module/zfs/zvol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c29f65f676b..7c7500dbaaf 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1876,6 +1876,10 @@ zvol_create_minor_impl(const char *name) #ifdef QUEUE_FLAG_ADD_RANDOM blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue); #endif + /* This flag was introduced in kernel version 4.12. */ +#ifdef QUEUE_FLAG_SCSI_PASSTHROUGH + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_queue); +#endif if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) From 7a5f4656ce76dbb2c7f3c6810f670a713da48a9e Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Sat, 22 Jun 2019 16:32:26 -0700 Subject: [PATCH 078/325] Fix comments on zfs_bookmark_phys Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Reviewed-by: George Melikov Signed-off-by: Paul Dagnelie Closes #8945 --- include/sys/dsl_bookmark.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h index 3cdad744140..ea7d70cf323 100644 --- a/include/sys/dsl_bookmark.h +++ b/include/sys/dsl_bookmark.h @@ -37,9 +37,11 @@ typedef struct zfs_bookmark_phys { uint64_t zbm_creation_txg; /* birth transaction group */ uint64_t zbm_creation_time; /* bookmark creation time */ - /* the following fields are reserved for redacted send / recv */ + /* fields used for redacted send / recv */ uint64_t zbm_redaction_obj; /* redaction list object */ uint64_t zbm_flags; /* ZBM_FLAG_* */ + + /* fields used for bookmark written size */ uint64_t zbm_referenced_bytes_refd; uint64_t zbm_compressed_bytes_refd; uint64_t zbm_uncompressed_bytes_refd; From d053481523b369b7c00f5fd1c1b1ae54876b8f69 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Sat, 22 Jun 2019 19:33:44 -0400 Subject: [PATCH 079/325] zstreamdump: add per-record-type counters and an overhead counter Count the bytes of payload for each replication record type Count the bytes of overhead (replication records themselves) Include these counters in the output summary at the end of the run. Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Allan Jude Sponsored-By: Klara Systems and Catalogic Closes #8432 --- cmd/zstreamdump/zstreamdump.c | 63 ++++++++++++------- .../tests/functional/rsend/rsend.kshlib | 2 +- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index a162eceda58..a65b4cef3d3 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -53,7 +53,6 @@ */ #define DUMP_GROUPING 4 -uint64_t total_write_size = 0; uint64_t total_stream_len = 0; FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; @@ -219,6 +218,9 @@ main(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_payload_size = 0; + uint64_t total_overhead_size = 0; + uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; char salt[ZIO_DATA_SALT_LEN * 2 + 1]; char iv[ZIO_DATA_IV_LEN * 2 + 1]; char mac[ZIO_DATA_MAC_LEN * 2 + 1]; @@ -336,7 +338,9 @@ main(int argc, char *argv[]) } drr_record_count[drr->drr_type]++; + total_overhead_size += sizeof (*drr); total_records++; + payload_size = 0; switch (drr->drr_type) { case DRR_BEGIN: @@ -390,6 +394,7 @@ main(int argc, char *argv[]) nvlist_print(stdout, nv); nvlist_free(nv); } + payload_size = sz; } break; @@ -554,7 +559,6 @@ main(int argc, char *argv[]) if (dump) { print_block(buf, payload_size); } - total_write_size += payload_size; break; case DRR_WRITE_BYREF: @@ -683,6 +687,7 @@ main(int argc, char *argv[]) print_block(buf, P2ROUNDUP(drrwe->drr_psize, 8)); } + payload_size = P2ROUNDUP(drrwe->drr_psize, 8); break; case DRR_OBJECT_RANGE: if (do_byteswap) { @@ -723,6 +728,8 @@ main(int argc, char *argv[]) (longlong_t)drrc->drr_checksum.zc_word[3]); } pcksum = zc; + drr_byte_count[drr->drr_type] += payload_size; + total_payload_size += payload_size; } free(buf); fletcher_4_fini(); @@ -730,28 +737,40 @@ main(int argc, char *argv[]) /* Print final summary */ (void) printf("SUMMARY:\n"); - (void) printf("\tTotal DRR_BEGIN records = %lld\n", - (u_longlong_t)drr_record_count[DRR_BEGIN]); - (void) printf("\tTotal DRR_END records = %lld\n", - (u_longlong_t)drr_record_count[DRR_END]); - (void) printf("\tTotal DRR_OBJECT records = %lld\n", - (u_longlong_t)drr_record_count[DRR_OBJECT]); - (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); - (void) printf("\tTotal DRR_WRITE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE]); - (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); - (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); - (void) printf("\tTotal DRR_FREE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREE]); - (void) printf("\tTotal DRR_SPILL records = %lld\n", - (u_longlong_t)drr_record_count[DRR_SPILL]); + (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_BEGIN], + (u_longlong_t)drr_byte_count[DRR_BEGIN]); + (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_END], + (u_longlong_t)drr_byte_count[DRR_END]); + (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_OBJECT], + (u_longlong_t)drr_byte_count[DRR_OBJECT]); + (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], + (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); + (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE], + (u_longlong_t)drr_byte_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], + (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " + "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], + (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); + (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREE], + (u_longlong_t)drr_byte_count[DRR_FREE]); + (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_SPILL], + (u_longlong_t)drr_byte_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", (u_longlong_t)total_records); - (void) printf("\tTotal write size = %lld (0x%llx)\n", - (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); + (void) printf("\tTotal payload size = %lld (0x%llx)\n", + (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); + (void) printf("\tTotal header overhead = %lld (0x%llx)\n", + (u_longlong_t)total_overhead_size, + (u_longlong_t)total_overhead_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); return (0); diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 521a1c7eb63..8737ae55abf 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -754,7 +754,7 @@ function verify_stream_size datasetexists $ds || log_fail "No such dataset: $ds" typeset stream_size=$(cat $stream | zstreamdump | sed -n \ - 's/ Total write size = \(.*\) (0x.*)/\1/p') + 's/ Total payload size = \(.*\) (0x.*)/\1/p') typeset inc_size=0 if [[ -n $inc_src ]]; then From 95fcb04215015950b3388ba0a6edad8e1b463415 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Sat, 22 Jun 2019 16:41:21 -0700 Subject: [PATCH 080/325] Let zfs mount all tolerate in-progress mounts The zfs-mount service can unexpectedly fail to start when zfs encounters a mount that is in progress. This service uses zfs mount -a, which has a window between the time it checks if the dataset was mounted and when the actual mount (via mount.zfs binary) occurs. The reason for the racing mounts is that both zfs-mount.target and zfs-share.target are allowed to execute concurrently after the import. This is more of an issue with the relatively recent addition of parallel mounting, and we should consider serializing the mount and share targets. Reviewed-by: Brian Behlendorf Reviewed by: John Kennedy Reviewed-by: Allan Jude Signed-off-by: Don Brady Closes #8881 --- cmd/zfs/zfs_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 214a437c5dd..07421605522 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6446,8 +6446,25 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (1); } - if (zfs_mount(zhp, options, flags) != 0) + if (zfs_mount(zhp, options, flags) != 0) { + /* + * Check if a mount sneaked in after we checked + */ + if (!explicit && + libzfs_errno(g_zfs) == EZFS_MOUNTFAILED) { + usleep(10 * MILLISEC); + libzfs_mnttab_cache(g_zfs, B_FALSE); + + if (zfs_is_mounted(zhp, NULL)) { + (void) fprintf(stderr, gettext( + "Ignoring previous 'already " + "mounted' error for '%s'\n"), + zfs_get_name(zhp)); + return (0); + } + } return (1); + } break; } From 2d88230d97d9f9f4f3b89d1081eeab86fe3d9373 Mon Sep 17 00:00:00 2001 From: Harry Mallon <1816667+hjmallon@users.noreply.github.com> Date: Sun, 23 Jun 2019 00:43:11 +0100 Subject: [PATCH 081/325] Add libnvpair to libzfs pkg-config Functions such as `fnvlist_lookup_nvlist` need libnvpair to be linked. Default pkg-config file did not contain it. Reviewed-by: Brian Behlendorf Signed-off-by: Harry Mallon Closes #8919 --- lib/libzfs/libzfs.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libzfs/libzfs.pc.in b/lib/libzfs/libzfs.pc.in index 0e83f7a64be..1122401a6eb 100644 --- a/lib/libzfs/libzfs.pc.in +++ b/lib/libzfs/libzfs.pc.in @@ -9,4 +9,4 @@ Version: @VERSION@ URL: http://zfsonlinux.org Requires: libzfs_core Cflags: -I${includedir}/libzfs -I${includedir}/libspl -Libs: -L${libdir} -lzfs +Libs: -L${libdir} -lzfs -lnvpair From be4a282a8ddc6bf42ec1ba9c3b99d06052f1d625 Mon Sep 17 00:00:00 2001 From: gordan-bobic Date: Sun, 23 Jun 2019 00:47:19 +0100 Subject: [PATCH 082/325] Remove arch and relax version dependency Remove arch and relax version dependency for zfs-dracut package. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Gordan Bobic Issue #8913 Closes #8914 --- rpm/generic/zfs.spec.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 9faa3ba771a..0b16cd0e886 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -255,7 +255,8 @@ validating the file system. %package dracut Summary: Dracut module Group: System Environment/Kernel -Requires: %{name}%{?_isa} = %{version}-%{release} +BuildArch: noarch +Requires: %{name} >= %{version} Requires: dracut Requires: /usr/bin/awk Requires: grep From 7d64595c251682f4a38809ecd44e81b4d1af8b74 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Sat, 22 Jun 2019 16:48:54 -0700 Subject: [PATCH 083/325] dn_struct_rwlock can not be held in dmu_tx_try_assign() The thread calling dmu_tx_try_assign() can't hold the dn_struct_rwlock while assigning the tx, because this can lead to deadlock. Specifically, if this dnode is already assigned to an earlier txg, this thread may need to wait for that txg to sync (the ERESTART case below). The other thread that has assigned this dnode to an earlier txg prevents this txg from syncing until its tx can complete (calling dmu_tx_commit()), but it may need to acquire the dn_struct_rwlock to do so (e.g. via dmu_buf_hold*()). This commit adds an assertion to dmu_tx_try_assign() to ensure that this deadlock is not inadvertently introduced. Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #8929 --- module/zfs/dmu_tx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index cbadcc86fc6..7d65e842ff0 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -925,6 +925,25 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) txh = list_next(&tx->tx_holds, txh)) { dnode_t *dn = txh->txh_dnode; if (dn != NULL) { + /* + * This thread can't hold the dn_struct_rwlock + * while assigning the tx, because this can lead to + * deadlock. Specifically, if this dnode is already + * assigned to an earlier txg, this thread may need + * to wait for that txg to sync (the ERESTART case + * below). The other thread that has assigned this + * dnode to an earlier txg prevents this txg from + * syncing until its tx can complete (calling + * dmu_tx_commit()), but it may need to acquire the + * dn_struct_rwlock to do so (e.g. via + * dmu_buf_hold*()). + * + * Note that this thread can't hold the lock for + * read either, but the rwlock doesn't record + * enough information to make that assertion. + */ + ASSERT(!RW_WRITE_HELD(&dn->dn_struct_rwlock)); + mutex_enter(&dn->dn_mtx); if (dn->dn_assigned_txg == tx->tx_txg - 1) { mutex_exit(&dn->dn_mtx); From cc7fe8a59967092a9b42355794a1859feb30548f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 24 Jun 2019 09:32:47 -0700 Subject: [PATCH 084/325] Fix out-of-tree build failures Resolve the incorrect use of srcdir and builddir references for various files in the build system. These have crept in over time and went unnoticed because when building in the top level directory srcdir and builddir are identical. With this change it's again possible to build in a subdirectory. $ mkdir obj $ cd obj $ ../configure $ make Reviewed-by: loli10K Reviewed-by: Tony Hutter Reviewed-by: Don Brady Signed-off-by: Brian Behlendorf Closes #8921 Closes #8943 --- Makefile.am | 3 +- cmd/zed/Makefile.am | 57 +------------------ cmd/zed/zed.d/Makefile.am | 57 +++++++++++++++++++ configure.ac | 1 + contrib/initramfs/Makefile.am | 21 ++++--- contrib/pyzfs/Makefile.am | 2 +- module/Makefile.in | 5 +- scripts/Makefile.am | 5 +- tests/runfiles/Makefile.am | 5 +- .../tests/functional/checksum/Makefile.am | 2 +- .../tests/functional/hkdf/Makefile.am | 2 +- 11 files changed, 88 insertions(+), 72 deletions(-) create mode 100644 cmd/zed/zed.d/Makefile.am diff --git a/Makefile.am b/Makefile.am index 1ec2514922a..9afe2295410 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,7 +52,8 @@ distclean-local:: -type f -print | xargs $(RM) all-local: - -${top_srcdir}/scripts/zfs-tests.sh -c + -[ -x ${top_builddir}/scripts/zfs-tests.sh ] && \ + ${top_builddir}/scripts/zfs-tests.sh -c dist-hook: gitrev cp ${top_srcdir}/include/zfs_gitrev.h $(distdir)/include; \ diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 9c11315f2a5..fb479f9b5c7 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -1,12 +1,11 @@ +SUBDIRS = zed.d + include $(top_srcdir)/config/Rules.am DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include -EXTRA_DIST = zed.d/README \ - zed.d/history_event-zfs-list-cacher.sh.in - sbin_PROGRAMS = zed ZED_SRC = \ @@ -47,55 +46,3 @@ zed_LDADD = \ zed_LDADD += -lrt zed_LDFLAGS = -pthread - -zedconfdir = $(sysconfdir)/zfs/zed.d - -dist_zedconf_DATA = \ - zed.d/zed-functions.sh \ - zed.d/zed.rc - -zedexecdir = $(zfsexecdir)/zed.d - -dist_zedexec_SCRIPTS = \ - zed.d/all-debug.sh \ - zed.d/all-syslog.sh \ - zed.d/data-notify.sh \ - zed.d/generic-notify.sh \ - zed.d/resilver_finish-notify.sh \ - zed.d/scrub_finish-notify.sh \ - zed.d/statechange-led.sh \ - zed.d/statechange-notify.sh \ - zed.d/vdev_clear-led.sh \ - zed.d/vdev_attach-led.sh \ - zed.d/pool_import-led.sh \ - zed.d/resilver_finish-start-scrub.sh - -nodist_zedexec_SCRIPTS = zed.d/history_event-zfs-list-cacher.sh - -$(nodist_zedexec_SCRIPTS): %: %.in - -$(SED) -e 's,@bindir\@,$(bindir),g' \ - -e 's,@runstatedir\@,$(runstatedir),g' \ - -e 's,@sbindir\@,$(sbindir),g' \ - -e 's,@sysconfdir\@,$(sysconfdir),g' \ - $< >'$@' - -zedconfdefaults = \ - all-syslog.sh \ - data-notify.sh \ - resilver_finish-notify.sh \ - scrub_finish-notify.sh \ - statechange-led.sh \ - statechange-notify.sh \ - vdev_clear-led.sh \ - vdev_attach-led.sh \ - pool_import-led.sh \ - resilver_finish-start-scrub.sh - -install-data-hook: - $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" - for f in $(zedconfdefaults); do \ - test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ - -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ - ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ - done - chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am new file mode 100644 index 00000000000..716db2b2f21 --- /dev/null +++ b/cmd/zed/zed.d/Makefile.am @@ -0,0 +1,57 @@ +include $(top_srcdir)/config/Rules.am + +EXTRA_DIST = \ + README \ + history_event-zfs-list-cacher.sh.in + +zedconfdir = $(sysconfdir)/zfs/zed.d + +dist_zedconf_DATA = \ + zed-functions.sh \ + zed.rc + +zedexecdir = $(zfsexecdir)/zed.d + +dist_zedexec_SCRIPTS = \ + all-debug.sh \ + all-syslog.sh \ + data-notify.sh \ + generic-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +nodist_zedexec_SCRIPTS = history_event-zfs-list-cacher.sh + +$(nodist_zedexec_SCRIPTS): %: %.in + -$(SED) -e 's,@bindir\@,$(bindir),g' \ + -e 's,@runstatedir\@,$(runstatedir),g' \ + -e 's,@sbindir\@,$(sbindir),g' \ + -e 's,@sysconfdir\@,$(sysconfdir),g' \ + $< >'$@' + +zedconfdefaults = \ + all-syslog.sh \ + data-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +install-data-hook: + $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" + for f in $(zedconfdefaults); do \ + test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ + -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ + ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ + done + chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/configure.ac b/configure.ac index db614084e37..ea2e355c70b 100644 --- a/configure.ac +++ b/configure.ac @@ -120,6 +120,7 @@ AC_CONFIG_FILES([ cmd/dbufstat/Makefile cmd/arc_summary/Makefile cmd/zed/Makefile + cmd/zed/zed.d/Makefile cmd/raidz_test/Makefile cmd/zgenhostid/Makefile contrib/Makefile diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 87ec7a86f5a..9f912d94664 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -11,13 +11,18 @@ EXTRA_DIST = \ $(top_srcdir)/contrib/initramfs/README.initramfs.markdown install-initrdSCRIPTS: $(EXTRA_DIST) - for d in conf.d conf-hooks.d hooks scripts scripts/local-top; do \ - $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ - cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ - $(DESTDIR)$(initrddir)/$$d/; \ + for d in conf.d conf-hooks.d scripts/local-top; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ done - if [ -f etc/init.d/zfs ]; then \ - $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ - cp $(top_srcdir)/etc/init.d/zfs \ - $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ + for d in hooks scripts; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_builddir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ + done + if [ -f $(top_builddir)/etc/init.d/zfs ]; then \ + $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ + cp $(top_builddir)/etc/init.d/zfs \ + $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ fi diff --git a/contrib/pyzfs/Makefile.am b/contrib/pyzfs/Makefile.am index 1549bf23793..fa1bb32ce2e 100644 --- a/contrib/pyzfs/Makefile.am +++ b/contrib/pyzfs/Makefile.am @@ -24,7 +24,7 @@ all-local: # files are later created by manually loading the Python modules. # install-exec-local: - $(PYTHON) $(srcdir)/setup.py install \ + $(PYTHON) $(builddir)/setup.py install \ --prefix $(prefix) \ --root $(DESTDIR)/ \ --install-lib $(pythonsitedir) \ diff --git a/module/Makefile.in b/module/Makefile.in index 935bd266306..eca7691aedb 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -66,8 +66,9 @@ modules_uninstall: distdir: list='$(subdir-m)'; for subdir in $$list; do \ - (cd @top_srcdir@/module && find $$subdir -name '*.c' -o -name '*.h' -o -name '*.S' |\ - xargs cp --parents -t $$distdir); \ + (cd @top_srcdir@/module && find $$subdir \ + -name '*.c' -o -name '*.h' -o -name '*.S' | \ + xargs cp --parents -t @abs_top_builddir@/module/$$distdir); \ done distclean maintainer-clean: clean diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 11e963c527a..d275a41c4e0 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -60,7 +60,7 @@ all-local: -e '\|^export SBIN_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export ZTS_DIR=|s|$$|@abs_top_srcdir@/tests|' \ -e '\|^export SCRIPT_DIR=|s|$$|@abs_top_srcdir@/scripts|' \ - common.sh.in >common.sh + $(abs_top_srcdir)/scripts/common.sh.in >common.sh -echo "$$EXTRA_ENVIRONMENT" >>common.sh clean-local: @@ -71,4 +71,5 @@ install-data-hook: -e '\|^export SBIN_DIR=|s|$$|@sbindir@|' \ -e '\|^export ZTS_DIR=|s|$$|@datadir@/@PACKAGE@|' \ -e '\|^export SCRIPT_DIR=|s|$$|@datadir@/@PACKAGE@|' \ - common.sh.in >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh + $(abs_top_srcdir)/scripts/common.sh.in \ + >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh diff --git a/tests/runfiles/Makefile.am b/tests/runfiles/Makefile.am index 138d905a572..4625806ff8b 100644 --- a/tests/runfiles/Makefile.am +++ b/tests/runfiles/Makefile.am @@ -1,2 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/runfiles -dist_pkgdata_DATA = *.run +dist_pkgdata_DATA = \ + linux.run \ + longevity.run \ + perf-regression.run diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am index f72546b2259..905d991ed75 100644 --- a/tests/zfs-tests/tests/functional/checksum/Makefile.am +++ b/tests/zfs-tests/tests/functional/checksum/Makefile.am @@ -1,7 +1,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include -LDADD = $(top_srcdir)/lib/libicp/libicp.la +LDADD = $(top_builddir)/lib/libicp/libicp.la AUTOMAKE_OPTIONS = subdir-objects diff --git a/tests/zfs-tests/tests/functional/hkdf/Makefile.am b/tests/zfs-tests/tests/functional/hkdf/Makefile.am index 3ac26ed21c1..b54e353cd96 100644 --- a/tests/zfs-tests/tests/functional/hkdf/Makefile.am +++ b/tests/zfs-tests/tests/functional/hkdf/Makefile.am @@ -2,7 +2,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include AM_CPPFLAGS += -I$(top_srcdir)/lib/libspl/include -LDADD = $(top_srcdir)/lib/libzpool/libzpool.la +LDADD = $(top_builddir)/lib/libzpool/libzpool.la AUTOMAKE_OPTIONS = subdir-objects From bfe5f029cfb0ae5e246898baf928c944c220ff46 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 24 Jun 2019 19:42:52 -0400 Subject: [PATCH 085/325] Fix error message on promoting encrypted dataset This patch corrects the error message reported when attempting to promote a dataset outside of its encryption root. Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8905 Closes #8935 --- lib/libzfs/libzfs_dataset.c | 10 ++++++++++ module/zfs/dsl_crypt.c | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index ee5a6412ead..0d0194e6845 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -4117,6 +4117,16 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { switch (ret) { + case EACCES: + /* + * Promoting encrypted dataset outside its + * encryption root. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot promote dataset outside its " + "encryption root")); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + case EEXIST: /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 0c0ffaadd8f..568fe7aa326 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1676,11 +1676,15 @@ dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) * Check that the parent of the target has the same encryption root. */ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); if (op_rddobj != tp_rddobj) From 05006f125ccd97851d5f673483fb4ba606bdf0d3 Mon Sep 17 00:00:00 2001 From: Igor K Date: Tue, 25 Jun 2019 03:58:12 +0300 Subject: [PATCH 086/325] -Y option for zdb is valid The -Y option was added for ztest to test split block reconstruction. Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Signed-off-by: Igor Kozhukhov Closes #8926 --- tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh index a5f827b5642..e69779bd4b4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh @@ -59,7 +59,7 @@ set -A args "create" "add" "destroy" "import fakepool" \ "-a" "-f" "-g" "-h" "-j" "-m" "-n" "-o" "-p" \ "-p /tmp" "-r" "-t" "-w" "-x" "-y" "-z" \ "-D" "-E" "-G" "-H" "-I" "-J" "-K" "-M" \ - "-N" "-Q" "-R" "-S" "-T" "-W" "-Y" "-Z" + "-N" "-Q" "-R" "-S" "-T" "-W" "-Z" log_assert "Execute zdb using invalid parameters." From 04d4df89f4526eecd66fa1c380dba5ee3aff261c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 25 Jun 2019 15:03:38 -0400 Subject: [PATCH 087/325] Avoid extra taskq_dispatch() calls by DMU DMU sync code calls taskq_dispatch() for each sublist of os_dirty_dnodes and os_synced_dnodes. Since the number of sublists by default is equal to number of CPUs, it will dispatch equal, potentially large, number of tasks, waking up many CPUs to handle them, even if only one or few of sublists actually have any work to do. This change adds check for empty sublists to avoid this. Reviewed by: Sean Eric Fagan Reviewed by: Matt Ahrens Reviewed by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #8909 --- include/sys/multilist.h | 2 ++ module/zfs/dmu_objset.c | 19 ++++++++++++++----- module/zfs/multilist.c | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 43954068597..0c7b4075d9a 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -89,6 +89,8 @@ void multilist_sublist_insert_head(multilist_sublist_t *, void *); void multilist_sublist_insert_tail(multilist_sublist_t *, void *); void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj); void multilist_sublist_remove(multilist_sublist_t *, void *); +int multilist_sublist_is_empty(multilist_sublist_t *); +int multilist_sublist_is_empty_idx(multilist_t *, unsigned int); void *multilist_sublist_head(multilist_sublist_t *); void *multilist_sublist_tail(multilist_sublist_t *); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 30436b188fc..29ed45a55dc 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1692,6 +1692,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; + int num_sublists; + multilist_t *ml; blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); *blkptr_copy = *os->os_rootbp; @@ -1780,10 +1782,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) } } - for (int i = 0; - i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) { + ml = os->os_dirty_dnodes[txgoff]; + num_sublists = multilist_get_num_sublists(ml); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(ml, i)) + continue; sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP); - sda->sda_list = os->os_dirty_dnodes[txgoff]; + sda->sda_list = ml; sda->sda_sublist_idx = i; sda->sda_tx = tx; (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq, @@ -2086,6 +2091,8 @@ userquota_updates_task(void *arg) void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) { + int num_sublists; + if (!dmu_objset_userused_enabled(os)) return; @@ -2118,8 +2125,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } - for (int i = 0; - i < multilist_get_num_sublists(os->os_synced_dnodes); i++) { + num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i)) + continue; userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); uua->uua_os = os; diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 2a594c56cbd..b74ee0f0670 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -363,6 +363,28 @@ multilist_sublist_remove(multilist_sublist_t *mls, void *obj) list_remove(&mls->mls_list, obj); } +int +multilist_sublist_is_empty(multilist_sublist_t *mls) +{ + ASSERT(MUTEX_HELD(&mls->mls_lock)); + return (list_is_empty(&mls->mls_list)); +} + +int +multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) +{ + multilist_sublist_t *mls; + int empty; + + ASSERT3U(sublist_idx, <, ml->ml_num_sublists); + mls = &ml->ml_sublists[sublist_idx]; + ASSERT(!MUTEX_HELD(&mls->mls_lock)); + mutex_enter(&mls->mls_lock); + empty = list_is_empty(&mls->mls_list); + mutex_exit(&mls->mls_lock); + return (empty); +} + void * multilist_sublist_head(multilist_sublist_t *mls) { From 7d2489cfad1b04c1b22292d0a9a58f85195ce00c Mon Sep 17 00:00:00 2001 From: George Wilson Date: Fri, 28 Jun 2019 15:40:24 -0400 Subject: [PATCH 088/325] nopwrites on dmu_sync-ed blocks can result in a panic After device removal, performing nopwrites on a dmu_sync-ed block will result in a panic. This panic can show up in two ways: 1. an attempt to issue an IOCTL in vdev_indirect_io_start() 2. a failed comparison of zio->io_bp and zio->io_bp_orig in zio_done() To resolve both of these panics, nopwrites of blocks on indirect vdevs should be ignored and new allocations should be performed on concrete vdevs. Reviewed-by: Igor Kozhukhov Reviewed-by: Pavel Zakharov Reviewed-by: Brian Behlendorf Reviewed-by: Don Brady Signed-off-by: George Wilson Closes #8957 --- module/zfs/zio.c | 14 +++ tests/runfiles/linux.run | 2 +- .../tests/functional/removal/Makefile.am | 2 +- .../functional/removal/removal_nopwrite.ksh | 87 +++++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh diff --git a/module/zfs/zio.c b/module/zfs/zio.c index f9503bd3ff8..94eaa5888a9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -2862,6 +2862,20 @@ zio_nop_write(zio_t *zio) ASSERT(bcmp(&bp->blk_prop, &bp_orig->blk_prop, sizeof (uint64_t)) == 0); + /* + * If we're overwriting a block that is currently on an + * indirect vdev, then ignore the nopwrite request and + * allow a new block to be allocated on a concrete vdev. + */ + spa_config_enter(zio->io_spa, SCL_VDEV, FTAG, RW_READER); + vdev_t *tvd = vdev_lookup_top(zio->io_spa, + DVA_GET_VDEV(&bp->blk_dva[0])); + if (tvd->vdev_ops == &vdev_indirect_ops) { + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + return (zio); + } + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + *bp = *bp_orig; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_flags |= ZIO_FLAG_NOPWRITE; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 22fc26212c0..3f82676ef21 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -758,7 +758,7 @@ tags = ['functional', 'refreserv'] pre = tests = ['removal_all_vdev', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', - 'removal_remap', 'removal_remap_deadlists', + 'removal_remap', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', 'removal_with_errors', 'removal_with_export', diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index ba42b899aca..df92e0b5ed4 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -18,7 +18,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal dist_pkgdata_SCRIPTS = \ cleanup.ksh removal_all_vdev.ksh removal_check_space.ksh \ removal_condense_export.ksh removal_multiple_indirection.ksh \ - removal_remap_deadlists.ksh removal_remap.ksh \ + removal_remap_deadlists.ksh removal_nopwrite.ksh removal_remap.ksh \ removal_reservation.ksh removal_resume_export.ksh \ removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \ removal_with_dedup.ksh removal_with_errors.ksh \ diff --git a/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh new file mode 100755 index 00000000000..cb8bd6b810c --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh @@ -0,0 +1,87 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib +. $STF_SUITE/tests/functional/nopwrite/nopwrite.shlib + +default_setup_noexit "$DISKS" +log_onexit default_cleanup_noexit +BLOCKSIZE=8192 + +origin="$TESTPOOL/$TESTFS" + +log_must zfs set compress=on $origin +log_must zfs set checksum=edonr $origin + +log_must zfs set recordsize=8k $origin +dd if=/dev/urandom of=$TESTDIR/file_8k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." +log_must zfs set recordsize=128k $origin +dd if=/dev/urandom of=$TESTDIR/file_128k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." + +zfs snapshot $origin@a || log_fail "zfs snap failed" +log_must zfs clone $origin@a $origin/clone + +# +# Verify that nopwrites work prior to removal +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +# +# Remove a device before testing nopwrites again +# +log_must zpool remove $TESTPOOL $REMOVEDISK +log_must wait_for_removal $TESTPOOL +log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK + +# +# Normally, we expect nopwrites to avoid allocating new blocks, but +# after a device has been removed the DVAs will get remapped when +# a L0's indirect bloock is written. This will negate the effects +# of nopwrite and should result in new allocations. +# + +# +# Perform a direct zil nopwrite test +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +# +# Perform an indirect zil nopwrite test +# +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +log_pass "Remove works with nopwrite." From 093bb6446120c50a7109ed7e7a0f2e76730b3160 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Wed, 3 Jul 2019 00:25:23 +0900 Subject: [PATCH 089/325] Don't use d_path() for automount mount point for chroot'd process Chroot'd process fails to automount snapshots due to realpath(3) failure in mount.zfs(8). Construct a mount point path from sb of the ctldir inode and dirent name, instead of from d_path(), so that chroot'd process doesn't get affected by its view of fs. Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #8903 Closes #8966 --- module/zfs/zfs_ctldir.c | 41 +++++++---------------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index c8071a7c215..aa50646fef8 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -703,37 +703,6 @@ zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, return (0); } -/* - * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" - */ -static int -zfsctl_snapshot_path(struct path *path, int len, char *full_path) -{ - char *path_buffer, *path_ptr; - int path_len, error = 0; - - path_buffer = kmem_alloc(len, KM_SLEEP); - - path_ptr = d_path(path, path_buffer, len); - if (IS_ERR(path_ptr)) { - error = -PTR_ERR(path_ptr); - goto out; - } - - path_len = path_buffer + len - 1 - path_ptr; - if (path_len > len) { - error = SET_ERROR(EFAULT); - goto out; - } - - memcpy(full_path, path_ptr, path_len); - full_path[path_len] = '\0'; -out: - kmem_free(path_buffer, len); - - return (error); -} - /* * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" */ @@ -1077,9 +1046,13 @@ zfsctl_snapshot_mount(struct path *path, int flags) if (error) goto error; - error = zfsctl_snapshot_path(path, MAXPATHLEN, full_path); - if (error) - goto error; + /* + * Construct a mount point path from sb of the ctldir inode and dirent + * name, instead of from d_path(), so that chroot'd process doesn't fail + * on mount.zfs(8). + */ + snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", + zfsvfs->z_vfs->vfs_mntpoint, dname(dentry)); /* * Multiple concurrent automounts of a snapshot are never allowed. From 9e09826b33092bfe41dce14e098b2d2f4931da2f Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Tue, 2 Jul 2019 20:30:00 -0400 Subject: [PATCH 090/325] Fix error text for EINVAL in zfs_receive_one() This small patch fixes the EINVAL case for zfs_receive_one(). A missing 'else' has been added to the two possible cases, which will ensure the intended error message is printed. Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Tom Caputi Closes #8977 --- lib/libzfs/libzfs_sendrecv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 052b96b9b65..0d3853e0a1c 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -4418,14 +4418,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: - if (flags->resumable) + if (flags->resumable) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "kernel modules must be upgraded to " "receive this stream.")); - if (embedded && !raw) + } else if (embedded && !raw) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incompatible embedded data stream " "feature with encrypted receive.")); + } (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: From 7a03d7c73cec63e3c3e771c8cf34d8876a0f0532 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 3 Jul 2019 13:01:54 -0700 Subject: [PATCH 091/325] Check b_freeze_cksum under ZFS_DEBUG_MODIFY conditional The b_freeze_cksum field can only have data when ZFS_DEBUG_MODIFY is set. Therefore, the EQUIV check must be wrapped accordingly. For the same reason the ASSERT in arc_buf_fill() in unsafe. However, since it's largely redundant it has simply been removed. Reviewed-by: George Wilson Reviewed-by: Allan Jude Reviewed-by: Igor Kozhukhov Signed-off-by: Brian Behlendorf Closes #8979 --- module/zfs/arc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 720365c4a93..f125ca6a4d1 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1872,7 +1872,8 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * There were no decompressed bufs, so there should not be a * checksum on the hdr either. */ - EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); + if (zfs_flags & ZFS_DEBUG_MODIFY) + EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); return (copied); } @@ -2253,7 +2254,6 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, */ if (arc_buf_try_copy_decompressed_data(buf)) { /* Skip byteswapping and checksumming (already done) */ - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { error = zio_decompress_data(HDR_GET_COMPRESS(hdr), From 14a11bf2f6052413cdaa5cf8193d16ce8f2fa388 Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Wed, 3 Jul 2019 16:05:02 -0400 Subject: [PATCH 092/325] Improve "Unable to automount" error message. Having the mountpoint and dataset name both in the message made it confusing to read. Additionally, convert this to a zfs_dbgmsg rather than sending it to the console. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Paul Zuchowski Closes #8959 --- module/zfs/zfs_ctldir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index aa50646fef8..52314f4e1bd 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -30,6 +30,7 @@ * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018 George Melikov. All Rights Reserved. + * Copyright (c) 2019 Datto, Inc. All rights reserved. */ /* @@ -1081,8 +1082,8 @@ zfsctl_snapshot_mount(struct path *path, int flags) error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (error) { if (!(error & MOUNT_BUSY << 8)) { - cmn_err(CE_WARN, "Unable to automount %s/%s: %d", - full_path, full_name, error); + zfs_dbgmsg("Unable to automount %s error=%d", + full_path, error); error = SET_ERROR(EISDIR); } else { /* From 1f72a18f59d73f6e09ea052fb51cc7e19eaa3250 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Fri, 5 Jul 2019 19:53:14 -0400 Subject: [PATCH 093/325] Remove VERIFY from dsl_dataset_crypt_stats() This patch fixes an issue where dsl_dataset_crypt_stats() would VERIFY that it was able to hold the encryption root. This function should instead silently continue without populating the related field in the nvlist, as is the convention for this code. Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #8976 --- module/zfs/dsl_crypt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 568fe7aa326..24711227ba5 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -2624,11 +2624,13 @@ dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) } if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { - VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, - &enc_root)); - dsl_dir_name(enc_root, buf); - dsl_dir_rele(enc_root, FTAG); - dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); + if (dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, + &enc_root) == 0) { + dsl_dir_name(enc_root, buf); + dsl_dir_rele(enc_root, FTAG); + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_ENCRYPTION_ROOT, buf); + } } } From 2ac233c633e9bce36df8e7a3d7501cf4a0e227bb Mon Sep 17 00:00:00 2001 From: loli10K Date: Tue, 9 Jul 2019 18:28:05 +0200 Subject: [PATCH 094/325] Fix dracut Debian/Ubuntu packaging This commit ensures make(1) targets that build .deb packages fail if alien(1) can't convert all .rpm files; additionally it also updates the zfs-dracut package name which was changed to "noarch" in ca4e5a7. Reviewed-by: Neal Gompa Reviewed-by: Brian Behlendorf Reviewed-by: Olaf Faaland Signed-off-by: loli10K Closes #8990 Closes #8991 --- config/deb.am | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/deb.am b/config/deb.am index e405547aa94..83059a92349 100644 --- a/config/deb.am +++ b/config/deb.am @@ -20,7 +20,7 @@ deb-kmod: deb-local rpm-kmod arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 @@ -30,7 +30,7 @@ deb-dkms: deb-local rpm-dkms arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 deb-utils: deb-local rpm-utils @@ -45,7 +45,7 @@ deb-utils: deb-local rpm-utils pkg5=libzpool2-$${version}.$${arch}.rpm; \ pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \ pkg7=$${name}-test-$${version}.$${arch}.rpm; \ - pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \ + pkg8=$${name}-dracut-$${version}.noarch.rpm; \ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \ pkg10=`ls python*-pyzfs-$${version}* | tail -1`; \ ## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism @@ -63,7 +63,7 @@ deb-utils: deb-local rpm-utils env PATH=$${path_prepend}:$${PATH} \ fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch \ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ - $$pkg8 $$pkg9 $$pkg10; \ + $$pkg8 $$pkg9 $$pkg10 || exit 1; \ $(RM) $${path_prepend}/dh_shlibdeps; \ rmdir $${path_prepend}; \ $(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ From ccd8125e450c2968b2878fd887da7fac5b9a49f1 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Wed, 10 Jul 2019 01:31:46 +0900 Subject: [PATCH 095/325] Fix race in parallel mount's thread dispatching algorithm Strategy of parallel mount is as follows. 1) Initial thread dispatching is to select sets of mount points that don't have dependencies on other sets, hence threads can/should run lock-less and shouldn't race with other threads for other sets. Each thread dispatched corresponds to top level directory which may or may not have datasets to be mounted on sub directories. 2) Subsequent recursive thread dispatching for each thread from 1) is to mount datasets for each set of mount points. The mount points within each set have dependencies (i.e. child directories), so child directories are processed only after parent directory completes. The problem is that the initial thread dispatching in zfs_foreach_mountpoint() can be multi-threaded when it needs to be single-threaded, and this puts threads under race condition. This race appeared as mount/unmount issues on ZoL for ZoL having different timing regarding mount(2) execution due to fork(2)/exec(2) of mount(8). `zfs unmount -a` which expects proper mount order can't unmount if the mounts were reordered by the race condition. There are currently two known patterns of input list `handles` in `zfs_foreach_mountpoint(..,handles,..)` which cause the race condition. 1) #8833 case where input is `/a /a /a/b` after sorting. The problem is that libzfs_path_contains() can't correctly handle an input list with two same top level directories. There is a race between two POSIX threads A and B, * ThreadA for "/a" for test1 and "/a/b" * ThreadB for "/a" for test0/a and in case of #8833, ThreadA won the race. Two threads were created because "/a" wasn't considered as `"/a" contains "/a"`. 2) #8450 case where input is `/ /var/data /var/data/test` after sorting. The problem is that libzfs_path_contains() can't correctly handle an input list containing "/". There is a race between two POSIX threads A and B, * ThreadA for "/" and "/var/data/test" * ThreadB for "/var/data" and in case of #8450, ThreadA won the race. Two threads were created because "/var/data" wasn't considered as `"/" contains "/var/data"`. In other words, if there is (at least one) "/" in the input list, the initial thread dispatching must be single-threaded since every directory is a child of "/", meaning they all directly or indirectly depend on "/". In both cases, the first non_descendant_idx() call fails to correctly determine "path1-contains-path2", and as a result the initial thread dispatching creates another thread when it needs to be single-threaded. Fix a conditional in libzfs_path_contains() to consider above two. Reviewed-by: Brian Behlendorf Reviewed by: Sebastien Roy Signed-off-by: Tomohiro Kusumi Closes #8450 Closes #8833 Closes #8878 --- lib/libzfs/libzfs_mount.c | 6 +- tests/runfiles/linux.run | 3 +- .../functional/cli_root/zfs_mount/Makefile.am | 1 + .../cli_root/zfs_mount/zfs_mount_test_race.sh | 116 ++++++++++++++++++ 4 files changed, 123 insertions(+), 3 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 649c232aa3e..d62801cfdac 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -1302,12 +1302,14 @@ mountpoint_cmp(const void *arga, const void *argb) } /* - * Return true if path2 is a child of path1. + * Return true if path2 is a child of path1 or path2 equals path1 or + * path1 is "/" (path2 is always a child of "/"). */ static boolean_t libzfs_path_contains(const char *path1, const char *path2) { - return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'); + return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 || + (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/')); } /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 3f82676ef21..27e36b594ab 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -182,7 +182,8 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', - 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', + 'zfs_mount_test_race'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am index b2de98934b7..c208a1c378d 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am @@ -19,6 +19,7 @@ dist_pkgdata_SCRIPTS = \ zfs_mount_all_mountpoints.ksh \ zfs_mount_encrypted.ksh \ zfs_mount_remount.ksh \ + zfs_mount_test_race.sh \ zfs_multi_mount.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh new file mode 100755 index 00000000000..404770b2727 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh @@ -0,0 +1,116 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.cfg + +# +# DESCRIPTION: +# Verify parallel mount ordering is consistent. +# +# There was a bug in initial thread dispatching algorithm which put threads +# under race condition which resulted in undefined mount order. The purpose +# of this test is to verify `zfs unmount -a` succeeds (not `zfs mount -a` +# succeeds, it always does) after `zfs mount -a`, which could fail if threads +# race. See github.com/zfsonlinux/zfs/issues/{8450,8833,8878} for details. +# +# STRATEGY: +# 1. Create pools and filesystems. +# 2. Set same mount point for >1 datasets. +# 3. Unmount all datasets. +# 4. Mount all datasets. +# 5. Unmount all datasets (verify this succeeds). +# + +verify_runnable "both" + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} +MNTPT=$TMPDIR/zfs_mount_test_race_mntpt +DISK1="$TMPDIR/zfs_mount_test_race_disk1" +DISK2="$TMPDIR/zfs_mount_test_race_disk2" + +TESTPOOL1=zfs_mount_test_race_tp1 +TESTPOOL2=zfs_mount_test_race_tp2 + +export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +function cleanup +{ + zpool destroy $TESTPOOL1 + zpool destroy $TESTPOOL2 + rm -rf $MNTPT + rm -rf /$TESTPOOL1 + rm -rf /$TESTPOOL2 + rm -f $DISK1 + rm -f $DISK2 + export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" + log_must zfs $mountall + unset __ZFS_POOL_RESTRICT +} +log_onexit cleanup + +log_note "Verify parallel mount ordering is consistent" + +log_must truncate -s $MINVDEVSIZE $DISK1 +log_must truncate -s $MINVDEVSIZE $DISK2 + +log_must zpool create -f $TESTPOOL1 $DISK1 +log_must zpool create -f $TESTPOOL2 $DISK2 + +log_must zfs create $TESTPOOL1/$TESTFS1 +log_must zfs create $TESTPOOL2/$TESTFS2 + +log_must zfs set mountpoint=none $TESTPOOL1 +log_must zfs set mountpoint=$MNTPT $TESTPOOL1/$TESTFS1 + +# Note that unmount can fail (due to race condition on `zfs mount -a`) with or +# without `canmount=off`. The race has nothing to do with canmount property, +# but turn it off for convenience of mount layout used in this test case. +log_must zfs set canmount=off $TESTPOOL2 +log_must zfs set mountpoint=$MNTPT $TESTPOOL2 + +# At this point, layout of datasets in two pools will look like below. +# Previously, on next `zfs mount -a`, pthreads assigned to TESTFS1 and TESTFS2 +# could race, and TESTFS2 usually (actually always) won in ZoL. Note that the +# problem is how two or more threads could initially be assigned to the same +# top level directory, not this specific layout. This layout is just an example +# that can reproduce race, and is also the layout reported in #8833. +# +# NAME MOUNTED MOUNTPOINT +# ---------------------------------------------- +# /$TESTPOOL1 no none +# /$TESTPOOL1/$TESTFS1 yes $MNTPT +# /$TESTPOOL2 no $MNTPT +# /$TESTPOOL2/$TESTFS2 yes $MNTPT/$TESTFS2 + +# Apparently two datasets must be mounted. +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# This unmount always succeeds, because potential race hasn't happened yet. +log_must zfs unmount -a +# This mount always succeeds, whether threads are under race condition or not. +log_must zfs mount -a + +# Verify datasets are mounted (TESTFS2 fails if the race broke mount order). +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# Verify unmount succeeds (fails if the race broke mount order). +log_must zfs unmount -a + +log_pass "Verify parallel mount ordering is consistent passed" From c3a3c5a30fea98f640e23b0f3c2c10d5606ba9fc Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Tue, 9 Jul 2019 15:02:40 -0500 Subject: [PATCH 096/325] pkg-utils python sitelib for SLES15 Use python -Esc to set __python_sitelib. Reviewed-by: Neal Gompa Reviewed-by: Brian Behlendorf Signed-off-by: Shaun Tancheff Closes #8969 --- rpm/generic/zfs.spec.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 0b16cd0e886..0864a72a115 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -99,6 +99,7 @@ %define __python_cffi_pkg python%{__python_pkg_version}-cffi %define __python_setuptools_pkg python%{__python_pkg_version}-setuptools %endif +%define __python_sitelib %(%{__python} -Esc "from distutils.sysconfig import get_python_lib; print(get_python_lib())") # By default python-pyzfs is enabled, with the exception of # RHEL 6 which by default uses Python 2.6 which is too old. @@ -474,8 +475,8 @@ systemctl --system daemon-reload >/dev/null || true %doc contrib/pyzfs/README %doc contrib/pyzfs/LICENSE %defattr(-,root,root,-) -%{python_sitelib}/libzfs_core/* -%{python_sitelib}/pyzfs* +%{__python_sitelib}/libzfs_core/* +%{__python_sitelib}/pyzfs* %endif %if 0%{?_initramfs} From 6e19cc77cfd10a8587181f57ef4f9d7a1a7bc5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= Date: Wed, 10 Jul 2019 20:44:52 +0200 Subject: [PATCH 097/325] Fix ZTS killed processes detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit log_neg_expect was using the wrong exit status to detect if a process got killed by SIGSEGV or SIGBUS, resulting in false positives. Reviewed-by: loli10K Reviewed by: John Kennedy Reviewed by: Brian Behlendorf Signed-off-by: Attila Fülöp Closes #9003 --- tests/test-runner/include/logapi.shlib | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-runner/include/logapi.shlib b/tests/test-runner/include/logapi.shlib index 32fc0061618..cd7982a94a0 100644 --- a/tests/test-runner/include/logapi.shlib +++ b/tests/test-runner/include/logapi.shlib @@ -198,12 +198,12 @@ function log_neg_expect elif (( $status == 127 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (File not found)" - # bus error - core dump - elif (( $status == 138 )); then + # bus error - core dump (256+signal, SIGBUS=7) + elif (( $status == 263 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (Bus Error)" - # segmentation violation - core dump - elif (( $status == 139 )); then + # segmentation violation - core dump (256+signal, SIGSEGV=11) + elif (( $status == 267 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (SEGV)" else From cf966cb19ae63f65c518678ce57642c716808ef6 Mon Sep 17 00:00:00 2001 From: Nick Mattis Date: Wed, 10 Jul 2019 18:54:49 -0400 Subject: [PATCH 098/325] Fixes: #8934 Large kmem_alloc Large allocation over the spl_kmem_alloc_warn value was being performed. Switched to vmem_alloc interface as specified for large allocations. Changed the subsequent frees to match. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: nmattis Closes #8934 Closes #9011 --- module/zfs/vdev_indirect_births.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/module/zfs/vdev_indirect_births.c b/module/zfs/vdev_indirect_births.c index 1c44a64287d..99b83c39225 100644 --- a/module/zfs/vdev_indirect_births.c +++ b/module/zfs/vdev_indirect_births.c @@ -70,7 +70,7 @@ vdev_indirect_births_close(vdev_indirect_births_t *vib) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - kmem_free(vib->vib_entries, births_size); + vmem_free(vib->vib_entries, births_size); vib->vib_entries = NULL; } @@ -108,7 +108,7 @@ vdev_indirect_births_open(objset_t *os, uint64_t births_object) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - vib->vib_entries = kmem_alloc(births_size, KM_SLEEP); + vib->vib_entries = vmem_alloc(births_size, KM_SLEEP); VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0, births_size, vib->vib_entries, DMU_READ_PREFETCH)); } @@ -148,10 +148,10 @@ vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); - new_entries = kmem_alloc(new_size, KM_SLEEP); + new_entries = vmem_alloc(new_size, KM_SLEEP); if (old_size > 0) { bcopy(vib->vib_entries, new_entries, old_size); - kmem_free(vib->vib_entries, old_size); + vmem_free(vib->vib_entries, old_size); } new_entries[vib->vib_phys->vib_count - 1] = vibe; vib->vib_entries = new_entries; From 0a223246e124e68bbd2ee2cd7ddcd0bbcd6fa3a5 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 16 Jul 2019 05:57:56 +0900 Subject: [PATCH 099/325] Disable unused pathname::pn_path* (unneeded in Linux) struct pathname is originally from Solaris VFS, and it has been used in ZoL to merely call VOP from Linux VFS interface without API change, therefore pathname::pn_path* are unused and unneeded. Technically, struct pathname is a wrapper for C string in ZoL. Saves stack a bit on lookup and unlink. (#if0'd members instead of removing since comments refer to them.) Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Reviewed-by: George Melikov Signed-off-by: Tomohiro Kusumi Closes #9025 --- include/sys/pathname.h | 2 ++ module/zfs/pathname.c | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/sys/pathname.h b/include/sys/pathname.h index 5db69b1784c..d79cc5c01af 100644 --- a/include/sys/pathname.h +++ b/include/sys/pathname.h @@ -54,8 +54,10 @@ extern "C" { */ typedef struct pathname { char *pn_buf; /* underlying storage */ +#if 0 /* unused in ZoL */ char *pn_path; /* remaining pathname */ size_t pn_pathlen; /* remaining length */ +#endif size_t pn_bufsize; /* total size of pn_buf */ } pathname_t; diff --git a/module/zfs/pathname.c b/module/zfs/pathname.c index e3e97c9bb36..4766762f37d 100644 --- a/module/zfs/pathname.c +++ b/module/zfs/pathname.c @@ -71,9 +71,12 @@ pn_alloc(struct pathname *pnp) void pn_alloc_sz(struct pathname *pnp, size_t sz) { - pnp->pn_path = pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); - pnp->pn_pathlen = 0; + pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); pnp->pn_bufsize = sz; +#if 0 /* unused in ZoL */ + pnp->pn_path = pnp->pn_buf; + pnp->pn_pathlen = 0; +#endif } /* @@ -84,6 +87,10 @@ pn_free(struct pathname *pnp) { /* pn_bufsize is usually MAXPATHLEN, but may not be */ kmem_free(pnp->pn_buf, pnp->pn_bufsize); - pnp->pn_path = pnp->pn_buf = NULL; - pnp->pn_pathlen = pnp->pn_bufsize = 0; + pnp->pn_buf = NULL; + pnp->pn_bufsize = 0; +#if 0 /* unused in ZoL */ + pnp->pn_path = NULL; + pnp->pn_pathlen = 0; +#endif } From 78831d42906436c93570a7181548faaf456eb60f Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 15 Jul 2019 16:08:42 -0700 Subject: [PATCH 100/325] Ensure dsl_destroy_head() decrypts objsets This patch corrects a small issue where the dsl_destroy_head() code that runs when the async_destroy feature is disabled would not properly decrypt the dataset before beginning processing. If the dataset is not able to be decrypted, the optimization code now simply does not run and the dataset is completely destroyed in the DSL sync task. Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #9021 --- module/zfs/dsl_destroy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 465b3dfac89..a01abfa0038 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -1059,9 +1059,10 @@ dsl_destroy_head(const char *name) /* * Head deletion is processed in one txg on old pools; * remove the objects from open context so that the txg sync - * is not too long. + * is not too long. This optimization can only work for + * encrypted datasets if the wrapping key is loaded. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE, FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = @@ -1073,7 +1074,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, B_FALSE, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); } } From d751b12a9d927d71a1c584be25bf705bb8decda2 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 15 Jul 2019 16:11:55 -0700 Subject: [PATCH 101/325] Export dnode symbols External consumers such as Lustre require access to the dnode interfaces in order to correctly manipulate dnodes. Reviewed-by: James Simmons Reviewed-by: Olaf Faaland Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Issue #8994 Closes #9027 --- module/zfs/dnode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c06f614e199..5fd473303d7 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -2483,3 +2483,13 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, return (error); } + +#if defined(_KERNEL) +EXPORT_SYMBOL(dnode_hold); +EXPORT_SYMBOL(dnode_rele); +EXPORT_SYMBOL(dnode_set_nlevels); +EXPORT_SYMBOL(dnode_set_blksz); +EXPORT_SYMBOL(dnode_free_range); +EXPORT_SYMBOL(dnode_evict_dbufs); +EXPORT_SYMBOL(dnode_evict_bonus); +#endif From 73e50a7d5ddb20e20fd1eab23f00f26f85bd717a Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 16 Jul 2019 08:26:52 +0900 Subject: [PATCH 102/325] Drop redundant POSIX ACL check in zpl_init_acl() ZFS_ACLTYPE_POSIXACL has already been tested in zpl_init_acl(), so no need to test again on POSIX ACL access. Reviewed by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #9009 --- module/zfs/zpl_xattr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 8ee6e9a97f0..95523f28e3b 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -1130,12 +1130,9 @@ zpl_init_acl(struct inode *ip, struct inode *dir) return (0); if (!S_ISLNK(ip->i_mode)) { - if (ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) { - acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return (PTR_ERR(acl)); - } - + acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return (PTR_ERR(acl)); if (!acl) { ip->i_mode &= ~current_umask(); ip->i_ctime = current_time(ip); @@ -1144,7 +1141,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir) } } - if ((ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) && acl) { + if (acl) { umode_t mode; if (S_ISDIR(ip->i_mode)) { From af7a5672c3d1ef17d352627e64c24d762da919e3 Mon Sep 17 00:00:00 2001 From: Antonio Russo Date: Sun, 2 Jun 2019 08:57:10 -0400 Subject: [PATCH 103/325] systemd encryption key support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify zfs-mount-generator to produce a dependency on new zfs-import-key-*.service units, dynamically created at boot to call zfs load-key for the encryption root, before attempting to mount any encrypted datasets. These units are created by zfs-mount-generator, and RequiresMountsFor on the keyfile, if present, or call systemd-ask-password if a passphrase is requested. This patch includes suggestions from @Fabian-Gruenbichler, @ryanjaeb and @rlaager, as well an adaptation of @rlaager's script to retry on incorrect password entry. Reviewed-by: Richard Laager Reviewed-by: Fabian Grünbichler Reviewed-by: Brian Behlendorf Signed-off-by: Antonio Russo Closes #8750 Closes #8848 --- .../zed.d/history_event-zfs-list-cacher.sh.in | 4 +- .../system-generators/zfs-mount-generator.in | 54 ++++++++++++++++++- man/man8/zfs-mount-generator.8.in | 2 +- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in index c1513cf3a01..6d0f44ab326 100755 --- a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in +++ b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in @@ -47,7 +47,7 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in # Only act if one of the tracked properties is altered. case "${ZEVENT_HISTORY_INTERNAL_STR%%=*}" in canmount|mountpoint|atime|relatime|devices|exec| \ - readonly|setuid|nbmand) ;; + readonly|setuid|nbmand|encroot|keylocation) ;; *) exit 0 ;; esac ;; @@ -62,7 +62,7 @@ zed_lock zfs-list trap abort_alter EXIT PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly" -PROPS="${PROPS},setuid,nbmand" +PROPS="${PROPS},setuid,nbmand,encroot,keylocation" "${ZFS}" list -H -t filesystem -o $PROPS -r "${ZEVENT_POOL}" > "${FSLIST_TMP}" diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 5428eb25d92..ae208c965f9 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -71,6 +71,8 @@ process_line() { p_readonly="${8}" p_setuid="${9}" p_nbmand="${10}" + p_encroot="${11}" + p_keyloc="${12}" # Check for canmount=off . if [ "${p_canmount}" = "off" ] ; then @@ -168,6 +170,54 @@ process_line() { "${dataset}" >/dev/kmsg fi + # Minimal pre-requisites to mount a ZFS dataset + wants="zfs-import.target" + if [ -n "${p_encroot}" ] && + [ "${p_encroot}" != "-" ] ; then + keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" + if [ "${p_encroot}" = "${dataset}" ] ; then + pathdep="" + if [ "${p_keyloc%%://*}" = "file" ] ; then + pathdep="RequiresMountsFor='${p_keyloc#file://}'" + keyloadcmd="@sbindir@/zfs load-key '${dataset}'" + elif [ "${p_keyloc}" = "prompt" ] ; then + keyloadcmd="sh -c 'set -eu;"\ +"count=0;"\ +"while [ \$\$count -lt 3 ];do"\ +" systemd-ask-password --id=\"zfs:${dataset}\""\ +" \"Enter passphrase for ${dataset}:\"|"\ +" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ +" count=\$\$((count + 1));"\ +"done;"\ +"exit 1'" + else + printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ + "${dataset}" >/dev/kmsg + fi + cat > "${dest_norm}/${keyloadunit}" << EOF +# Automatically generated by zfs-mount-generator + +[Unit] +Description=Load ZFS key for ${dataset} +SourcePath=${cachefile} +Documentation=man:zfs-mount-generator(8) +DefaultDependencies=no +Wants=${wants} +After=${wants} +${pathdep} + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=${keyloadcmd} +ExecStop=@sbindir@/zfs unload-key '${dataset}' +EOF + fi + # Update the dependencies for the mount file to require the + # key-loading unit. + wants="${wants},${keyloadunit}" + fi + # If the mountpoint has already been created, give it precedence. if [ -e "${dest_norm}/${mountfile}" ] ; then printf 'zfs-mount-generator: %s already exists\n' "${mountfile}" \ @@ -183,8 +233,8 @@ process_line() { SourcePath=${cachefile} Documentation=man:zfs-mount-generator(8) Before=local-fs.target zfs-mount.service -After=zfs-import.target -Wants=zfs-import.target +After=${wants} +Wants=${wants} [Mount] Where=${p_mountpoint} diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index 79720601d62..48e4e2dfac2 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -26,7 +26,7 @@ information on ZFS mountpoints must be stored separately. The output of the command .PP .RS 4 -zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand +zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation .RE .PP for datasets that should be mounted by systemd, should be kept From 446d08fba4f2a795a278906167157bb6378176a1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 16 Jul 2019 14:14:12 -0700 Subject: [PATCH 104/325] Fix get_special_prop() build failure The cast of the size_t returned by strlcpy() to a uint64_t by the VERIFY3U can result in a build failure when CONFIG_FORTIFY_SOURCE is set. This is due to the additional hardening. Since the token is expected to always fit in strval the VERIFY3U has been removed. If somehow it doesn't, it will still be safely truncated. Reviewed-by: Tony Hutter Reviewed-by: Don Brady Signed-off-by: Brian Behlendorf Issue #8999 Closes #9020 --- module/zfs/zcp_get.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c index ed98f0d1025..0a5f0b8242a 100644 --- a/module/zfs/zcp_get.c +++ b/module/zfs/zcp_get.c @@ -423,13 +423,11 @@ get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname, case ZFS_PROP_RECEIVE_RESUME_TOKEN: { char *token = get_receive_resume_stats_impl(ds); - VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, token, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) { char *childval = get_child_receive_stats(ds); - VERIFY3U(strlcpy(strval, childval, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, childval, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) error = ENOENT; From 984bfb373fe7816e7c1b3ea0bf3fa937bc34d5d8 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 16 Jul 2019 17:22:31 -0700 Subject: [PATCH 105/325] Minor style cleanup Resolve an assortment of style inconsistencies including use of white space, typos, capitalization, and line wrapping. There is no functional change. Reviewed-by: Tony Hutter Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #9030 --- config/kernel-fpu.m4 | 9 ++++-- include/linux/simd_aarch64.h | 6 ++-- include/linux/simd_x86.h | 48 +++++++++++++++-------------- module/icp/algs/aes/aes_impl.c | 11 +++++-- module/icp/algs/modes/gcm.c | 10 +++--- module/icp/include/aes/aes_impl.h | 2 +- module/icp/include/modes/gcm_impl.h | 4 +-- module/spl/spl-thread.c | 3 +- module/zcommon/zfs_fletcher.c | 6 ++-- 9 files changed, 57 insertions(+), 42 deletions(-) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index 5fff79a74c7..ebb02fb09a2 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -18,7 +18,8 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ #include ],[ ],[ - AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, [kernel has asm/fpu/api.h]) + AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, + [kernel has asm/fpu/api.h]) AC_MSG_RESULT(asm/fpu/api.h) ],[ AC_MSG_RESULT(i387.h & xcr.h) @@ -39,8 +40,10 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ kernel_fpu_end(); ], [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ AC_MSG_RESULT(kernel_fpu_*) - AC_DEFINE(HAVE_KERNEL_FPU, 1, [kernel has kernel_fpu_* functions]) - AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) + AC_DEFINE(HAVE_KERNEL_FPU, 1, + [kernel has kernel_fpu_* functions]) + AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, + [kernel exports FPU functions]) ],[ ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include diff --git a/include/linux/simd_aarch64.h b/include/linux/simd_aarch64.h index 155ef620559..56153a16072 100644 --- a/include/linux/simd_aarch64.h +++ b/include/linux/simd_aarch64.h @@ -26,8 +26,10 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_initialize() + * kfpu_begin() + * kfpu_end() */ #ifndef _SIMD_AARCH64_H diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 12cd7467788..0489bfaa3a7 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -26,8 +26,10 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_initialize() + * kfpu_begin() + * kfpu_end() * * SIMD support: * @@ -37,31 +39,31 @@ * all relevant feature test functions should be called. * * Supported features: - * zfs_sse_available() - * zfs_sse2_available() - * zfs_sse3_available() - * zfs_ssse3_available() - * zfs_sse4_1_available() - * zfs_sse4_2_available() + * zfs_sse_available() + * zfs_sse2_available() + * zfs_sse3_available() + * zfs_ssse3_available() + * zfs_sse4_1_available() + * zfs_sse4_2_available() * - * zfs_avx_available() - * zfs_avx2_available() + * zfs_avx_available() + * zfs_avx2_available() * - * zfs_bmi1_available() - * zfs_bmi2_available() + * zfs_bmi1_available() + * zfs_bmi2_available() * - * zfs_avx512f_available() - * zfs_avx512cd_available() - * zfs_avx512er_available() - * zfs_avx512pf_available() - * zfs_avx512bw_available() - * zfs_avx512dq_available() - * zfs_avx512vl_available() - * zfs_avx512ifma_available() - * zfs_avx512vbmi_available() + * zfs_avx512f_available() + * zfs_avx512cd_available() + * zfs_avx512er_available() + * zfs_avx512pf_available() + * zfs_avx512bw_available() + * zfs_avx512dq_available() + * zfs_avx512vl_available() + * zfs_avx512ifma_available() + * zfs_avx512vbmi_available() * * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers - * also add zfs_avx512vl_available() to feature check. + * also add zfs_avx512vl_available() to feature check. */ #ifndef _SIMD_X86_H @@ -190,7 +192,7 @@ typedef struct cpuid_feature_desc { * Descriptions of supported instruction sets */ static const cpuid_feature_desc_t cpuid_features[] = { - [SSE] = {1U, 0U, 1U << 25, EDX }, + [SSE] = {1U, 0U, 1U << 25, EDX }, [SSE2] = {1U, 0U, 1U << 26, EDX }, [SSE3] = {1U, 0U, 1U << 0, ECX }, [SSSE3] = {1U, 0U, 1U << 9, ECX }, diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index e1505063574..36e0686a51c 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -303,16 +303,21 @@ aes_impl_init(void) } aes_supp_impl_cnt = c; - /* set fastest implementation. assume hardware accelerated is fastest */ + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ #if defined(__x86_64) #if defined(HAVE_AES) - if (aes_aesni_impl.is_supported()) + if (aes_aesni_impl.is_supported()) { memcpy(&aes_fastest_impl, &aes_aesni_impl, sizeof (aes_fastest_impl)); - else + } else #endif + { memcpy(&aes_fastest_impl, &aes_x86_64_impl, sizeof (aes_fastest_impl)); + } #else memcpy(&aes_fastest_impl, &aes_generic_impl, sizeof (aes_fastest_impl)); diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 13bceef0f17..0afd957f0cf 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -646,7 +646,7 @@ const gcm_impl_ops_t *gcm_all_impl[] = { /* Indicate that benchmark has been completed */ static boolean_t gcm_impl_initialized = B_FALSE; -/* Select aes implementation */ +/* Select GCM implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) @@ -713,13 +713,15 @@ gcm_impl_init(void) /* set fastest implementation. assume hardware accelerated is fastest */ #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) - if (gcm_pclmulqdq_impl.is_supported()) + if (gcm_pclmulqdq_impl.is_supported()) { memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, sizeof (gcm_fastest_impl)); - else + } else #endif + { memcpy(&gcm_fastest_impl, &gcm_generic_impl, sizeof (gcm_fastest_impl)); + } strcpy(gcm_fastest_impl.name, "fastest"); @@ -742,7 +744,7 @@ static const struct { * If we are called before init(), user preference will be saved in * user_sel_impl, and applied in later init() call. This occurs when module * parameter is specified on module load. Otherwise, directly update - * icp_aes_impl. + * icp_gcm_impl. * * @val Name of gcm implementation to use * @param Unused. diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index 95cfddf9e0a..3a3de91cf6a 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -162,7 +162,7 @@ typedef enum aes_mech_type { #endif /* _AES_IMPL */ /* - * Methods used to define aes implementation + * Methods used to define AES implementation * * @aes_gen_f Key generation * @aes_enc_f Function encrypts one block diff --git a/module/icp/include/modes/gcm_impl.h b/module/icp/include/modes/gcm_impl.h index cbb904c059b..b78cc8aab01 100644 --- a/module/icp/include/modes/gcm_impl.h +++ b/module/icp/include/modes/gcm_impl.h @@ -37,12 +37,12 @@ extern "C" { #include /* - * Methods used to define gcm implementation + * Methods used to define GCM implementation * * @gcm_mul_f Perform carry-less multiplication * @gcm_will_work_f Function tests whether implementation will function */ -typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); +typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); typedef boolean_t (*gcm_will_work_f)(void); #define GCM_IMPL_NAME_MAX (16) diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c index d441ad65f31..0352a31ea83 100644 --- a/module/spl/spl-thread.c +++ b/module/spl/spl-thread.c @@ -153,8 +153,9 @@ spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...) if (PTR_ERR(tsk) == -ENOMEM) continue; return (NULL); - } else + } else { return (tsk); + } } while (1); } EXPORT_SYMBOL(spl_kthread_create); diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index 5a991ba6073..f712ce40c6e 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -592,8 +592,9 @@ fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) } #if defined(_KERNEL) -/* Fletcher 4 kstats */ - +/* + * Fletcher 4 kstats + */ static int fletcher_4_kstat_headers(char *buf, size_t size) { @@ -669,7 +670,6 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) zio_cksum_t zc; uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); - fletcher_checksum_func_t *fletcher_4_test = native ? fletcher_4_native : fletcher_4_byteswap; From 2b9f73e5e6ae6f210b1b316bbd7bcbf8c6c62d61 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Thu, 18 Jul 2019 01:07:53 +0900 Subject: [PATCH 106/325] Use zfsctl_snapshot_hold() wrapper zfs_refcount_*() are to be wrapped by zfsctl_snapshot_*() in this file. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #9039 --- module/zfs/zfs_ctldir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 52314f4e1bd..8acbbb61ca9 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -192,7 +192,7 @@ static void zfsctl_snapshot_add(zfs_snapentry_t *se) { ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); avl_add(&zfs_snapshots_by_name, se); avl_add(&zfs_snapshots_by_objsetid, se); } @@ -269,7 +269,7 @@ zfsctl_snapshot_find_by_name(char *snapname) search.se_name = snapname; se = avl_find(&zfs_snapshots_by_name, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } @@ -290,7 +290,7 @@ zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) search.se_objsetid = objsetid; se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } From ceb516ac2f4c2ddffcea8a6d282312dd941d3296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Wed, 17 Jul 2019 18:09:22 +0200 Subject: [PATCH 107/325] Add missing __GFP_HIGHMEM flag to vmalloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of __GFP_HIGHMEM flag in vmem_alloc, which is required for some 32-bit systems to make use of full available memory. While kernel versions >=4.12-rc1 add this flag implicitly, older kernels do not. Reviewed-by: Brian Behlendorf Signed-off-by: Sebastian Gottschall Signed-off-by: Michael Niewöhner Closes #9031 --- module/spl/spl-kmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 1fdb61e6fce..824b5e89f50 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -180,7 +180,8 @@ spl_kmem_alloc_impl(size_t size, int flags, int node) */ if ((size > spl_kmem_alloc_max) || use_vmem) { if (flags & KM_VMEM) { - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, + PAGE_KERNEL); } else { return (NULL); } From 4c98586dafab4518a7eea8db9a19271e99ae3110 Mon Sep 17 00:00:00 2001 From: jdike <52420226+jdike@users.noreply.github.com> Date: Wed, 17 Jul 2019 12:18:24 -0400 Subject: [PATCH 108/325] Fix lockdep recursive locking false positive in dbuf_destroy lockdep reports a possible recursive lock in dbuf_destroy. It is true that dbuf_destroy is acquiring the dn_dbufs_mtx on one dnode while holding it on another dnode. However, it is impossible for these to be the same dnode because, among other things,dbuf_destroy checks MUTEX_HELD before acquiring the mutex. This fix defines a class NESTED_SINGLE == 1 and changes that lock to call mutex_enter_nested with a subclass of NESTED_SINGLE. In order to make the userspace code compile, include/sys/zfs_context.h now defines mutex_enter_nested and NESTED_SINGLE. This is the lockdep report: [ 122.950921] ============================================ [ 122.950921] WARNING: possible recursive locking detected [ 122.950921] 4.19.29-4.19.0-debug-d69edad5368c1166 #1 Tainted: G O [ 122.950921] -------------------------------------------- [ 122.950921] dbu_evict/1457 is trying to acquire lock: [ 122.950921] 0000000083e9cbcf (&dn->dn_dbufs_mtx){+.+.}, at: dbuf_destroy+0x3c0/0xdb0 [zfs] [ 122.950921] but task is already holding lock: [ 122.950921] 0000000055523987 (&dn->dn_dbufs_mtx){+.+.}, at: dnode_evict_dbufs+0x90/0x740 [zfs] [ 122.950921] other info that might help us debug this: [ 122.950921] Possible unsafe locking scenario: [ 122.950921] CPU0 [ 122.950921] ---- [ 122.950921] lock(&dn->dn_dbufs_mtx); [ 122.950921] lock(&dn->dn_dbufs_mtx); [ 122.950921] *** DEADLOCK *** [ 122.950921] May be due to missing lock nesting notation [ 122.950921] 1 lock held by dbu_evict/1457: [ 122.950921] #0: 0000000055523987 (&dn->dn_dbufs_mtx){+.+.}, at: dnode_evict_dbufs+0x90/0x740 [zfs] [ 122.950921] stack backtrace: [ 122.950921] CPU: 0 PID: 1457 Comm: dbu_evict Tainted: G O 4.19.29-4.19.0-debug-d69edad5368c1166 #1 [ 122.950921] Hardware name: Supermicro H8SSL-I2/H8SSL-I2, BIOS 080011 03/13/2009 [ 122.950921] Call Trace: [ 122.950921] dump_stack+0x91/0xeb [ 122.950921] __lock_acquire+0x2ca7/0x4f10 [ 122.950921] lock_acquire+0x153/0x330 [ 122.950921] dbuf_destroy+0x3c0/0xdb0 [zfs] [ 122.950921] dbuf_evict_one+0x1cc/0x3d0 [zfs] [ 122.950921] dbuf_rele_and_unlock+0xb84/0xd60 [zfs] [ 122.950921] dnode_evict_dbufs+0x3a6/0x740 [zfs] [ 122.950921] dmu_objset_evict+0x7a/0x500 [zfs] [ 122.950921] dsl_dataset_evict_async+0x70/0x480 [zfs] [ 122.950921] taskq_thread+0x979/0x1480 [spl] [ 122.950921] kthread+0x2e7/0x3e0 [ 122.950921] ret_from_fork+0x27/0x50 Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Jeff Dike Closes #8984 --- include/spl/sys/mutex.h | 2 ++ include/sys/zfs_context.h | 2 ++ module/zfs/dbuf.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/spl/sys/mutex.h b/include/spl/sys/mutex.h index ed0cd4932cf..a61f35c61eb 100644 --- a/include/spl/sys/mutex.h +++ b/include/spl/sys/mutex.h @@ -127,6 +127,8 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ }) /* END CSTYLED */ +#define NESTED_SINGLE 1 + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define mutex_enter_nested(mp, subclass) \ { \ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index e3fa2e61bdc..598b86a7a65 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -257,6 +257,8 @@ extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); +#define NESTED_SINGLE 1 +#define mutex_enter_nested(mp, class) mutex_enter(mp) /* * RW locks */ diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 07e616f6f0d..94c49b3ef0a 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2591,7 +2591,8 @@ dbuf_destroy(dmu_buf_impl_t *db) if (db->db_blkid != DMU_BONUS_BLKID) { boolean_t needlock = !MUTEX_HELD(&dn->dn_dbufs_mtx); if (needlock) - mutex_enter(&dn->dn_dbufs_mtx); + mutex_enter_nested(&dn->dn_dbufs_mtx, + NESTED_SINGLE); avl_remove(&dn->dn_dbufs, db); atomic_dec_32(&dn->dn_dbufs_count); membar_producer(); From 54561073e7f6e258f6c9e96be60821d51db2ac34 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jul 2019 13:27:24 -0700 Subject: [PATCH 109/325] Linux 5.3 compat: rw_semaphore owner Commit https://github.com/torvalds/linux/commit/94a9717b updated the rwsem's owner field to contain additional flags describing the rwsem's state. Rather then update the wrappers to mask out these bits, the code no longer relies on the owner stored by the kernel. This does increase the size of a krwlock_t but it makes the implementation less sensitive to future kernel changes. Reviewed-by: Tony Hutter Reviewed-by: Tomohiro Kusumi Signed-off-by: Brian Behlendorf Closes #9029 --- include/spl/sys/rwlock.h | 68 +++------------------------------------- module/spl/spl-rwlock.c | 3 -- 2 files changed, 5 insertions(+), 66 deletions(-) diff --git a/include/spl/sys/rwlock.h b/include/spl/sys/rwlock.h index 408defac20d..5e052b532a4 100644 --- a/include/spl/sys/rwlock.h +++ b/include/spl/sys/rwlock.h @@ -78,15 +78,9 @@ typedef enum { RW_READER = 2 } krw_t; -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner - * field, so we don't need our own. - */ typedef struct { struct rw_semaphore rw_rwlock; -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER kthread_t *rw_owner; -#endif #ifdef CONFIG_LOCKDEP krw_type_t rw_type; #endif /* CONFIG_LOCKDEP */ @@ -97,31 +91,19 @@ typedef struct { static inline void spl_rw_set_owner(krwlock_t *rwp) { -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write, - * downgrade_write and __init_rwsem will set/clear owner for us. - */ -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = current; -#endif } static inline void spl_rw_clear_owner(krwlock_t *rwp) { -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = NULL; -#endif } static inline kthread_t * rw_owner(krwlock_t *rwp) { -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - return (SEM(rwp)->owner); -#else return (rwp->rw_owner); -#endif } #ifdef CONFIG_LOCKDEP @@ -148,62 +130,22 @@ spl_rw_lockdep_on_maybe(krwlock_t *rwp) \ #define spl_rw_lockdep_on_maybe(rwp) #endif /* CONFIG_LOCKDEP */ - static inline int -RW_WRITE_HELD(krwlock_t *rwp) +RW_LOCK_HELD(krwlock_t *rwp) { - return (rw_owner(rwp) == current); + return (spl_rwsem_is_locked(SEM(rwp))); } static inline int -RW_LOCK_HELD(krwlock_t *rwp) +RW_WRITE_HELD(krwlock_t *rwp) { - return (spl_rwsem_is_locked(SEM(rwp))); + return (rw_owner(rwp) == current); } static inline int RW_READ_HELD(krwlock_t *rwp) { - if (!RW_LOCK_HELD(rwp)) - return (0); - - /* - * rw_semaphore cheat sheet: - * - * < 3.16: - * There's no rw_semaphore.owner, so use rwp.owner instead. - * If rwp.owner == NULL then it's a reader - * - * 3.16 - 4.7: - * rw_semaphore.owner added (https://lwn.net/Articles/596656/) - * and CONFIG_RWSEM_SPIN_ON_OWNER introduced. - * If rw_semaphore.owner == NULL then it's a reader - * - * 4.8 - 4.16.16: - * RWSEM_READER_OWNED added as an internal #define. - * (https://lore.kernel.org/patchwork/patch/678590/) - * If rw_semaphore.owner == 1 then it's a reader - * - * 4.16.17 - 4.19: - * RWSEM_OWNER_UNKNOWN introduced as ((struct task_struct *)-1L) - * (https://do-db2.lkml.org/lkml/2018/5/15/985) - * If rw_semaphore.owner == 1 then it's a reader. - * - * 4.20+: - * RWSEM_OWNER_UNKNOWN changed to ((struct task_struct *)-2L) - * (https://lkml.org/lkml/2018/9/6/986) - * If rw_semaphore.owner & 1 then it's a reader, and also the reader's - * task_struct may be embedded in rw_semaphore->owner. - */ -#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && defined(RWSEM_OWNER_UNKNOWN) - if (RWSEM_OWNER_UNKNOWN == (struct task_struct *)-2L) { - /* 4.20+ kernels with CONFIG_RWSEM_SPIN_ON_OWNER */ - return ((unsigned long) SEM(rwp)->owner & 1); - } -#endif - - /* < 4.20 kernel or !CONFIG_RWSEM_SPIN_ON_OWNER */ - return (rw_owner(rwp) == NULL || (unsigned long) rw_owner(rwp) == 1); + return (RW_LOCK_HELD(rwp) && rw_owner(rwp) == NULL); } /* diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c index 86727ed1957..886e16924e6 100644 --- a/module/spl/spl-rwlock.c +++ b/module/spl/spl-rwlock.c @@ -119,9 +119,6 @@ rwsem_tryupgrade(struct rw_semaphore *rwsem) if (__rwsem_tryupgrade(rwsem)) { rwsem_release(&rwsem->dep_map, 1, _RET_IP_); rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - rwsem->owner = current; -#endif return (1); } return (0); From 3982d959c5b8577993740c03392c4efa750c0479 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jul 2019 14:06:36 -0700 Subject: [PATCH 110/325] Linux 5.3 compat: retire rw_tryupgrade() The Linux kernel's rwsem's have never provided an interface to allow a reader to be upgraded to a writer. Historically, this functionality has been implemented by a SPL wrapper function. However, this approach depends on internal knowledge of the rw_semaphore and is therefore rather brittle. Since the ZFS code must always be able to fallback to rw_exit() and rw_enter() when an rw_tryupgrade() fails; this functionality isn't critical. Furthermore, the only potentially performance sensitive consumer is dmu_zfetch() and no decrease in performance was observed with this change applied. See the PR comments for additional testing details. Therefore, it is being retired to make the build more robust and to simplify the rwlock implementation. Reviewed-by: Tony Hutter Reviewed-by: Tomohiro Kusumi Signed-off-by: Brian Behlendorf Closes #9029 --- include/spl/sys/rwlock.h | 60 +++-------------------- module/spl/spl-rwlock.c | 101 --------------------------------------- 2 files changed, 7 insertions(+), 154 deletions(-) diff --git a/include/spl/sys/rwlock.h b/include/spl/sys/rwlock.h index 5e052b532a4..89e02fa8f04 100644 --- a/include/spl/sys/rwlock.h +++ b/include/spl/sys/rwlock.h @@ -29,43 +29,6 @@ #include #include -/* Linux kernel compatibility */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (0) -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1) -#elif defined(RWSEM_ACTIVE_MASK) -#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS) -#endif - -/* Linux 3.16 changed activity to count for rwsem-spinlock */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define RWSEM_COUNT(sem) sem->read_depth -#elif defined(HAVE_RWSEM_ACTIVITY) -#define RWSEM_COUNT(sem) sem->activity -/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */ -#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -#define RWSEM_COUNT(sem) atomic_long_read(&(sem)->count) -#else -#define RWSEM_COUNT(sem) sem->count -#endif - -#if defined(RWSEM_SPINLOCK_IS_RAW) -#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) \ - raw_spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) raw_spin_trylock_irqsave(lk, fl) -#else -#define spl_rwsem_lock_irqsave(lk, fl) spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) spin_trylock_irqsave(lk, fl) -#endif /* RWSEM_SPINLOCK_IS_RAW */ - -#define spl_rwsem_is_locked(rwsem) rwsem_is_locked(rwsem) - typedef enum { RW_DRIVER = 2, RW_DEFAULT = 4, @@ -133,7 +96,7 @@ spl_rw_lockdep_on_maybe(krwlock_t *rwp) \ static inline int RW_LOCK_HELD(krwlock_t *rwp) { - return (spl_rwsem_is_locked(SEM(rwp))); + return (rwsem_is_locked(SEM(rwp))); } static inline int @@ -170,6 +133,12 @@ RW_READ_HELD(krwlock_t *rwp) */ #define rw_destroy(rwp) ((void) 0) +/* + * Upgrading a rwsem from a reader to a writer is not supported by the + * Linux kernel. The lock must be dropped and reacquired as a writer. + */ +#define rw_tryupgrade(rwp) RW_WRITE_HELD(rwp) + #define rw_tryenter(rwp, rw) \ ({ \ int _rc_ = 0; \ @@ -228,24 +197,9 @@ RW_READ_HELD(krwlock_t *rwp) spl_rw_lockdep_on_maybe(rwp); \ }) -#define rw_tryupgrade(rwp) \ -({ \ - int _rc_ = 0; \ - \ - if (RW_WRITE_HELD(rwp)) { \ - _rc_ = 1; \ - } else { \ - spl_rw_lockdep_off_maybe(rwp); \ - if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \ - spl_rw_set_owner(rwp); \ - spl_rw_lockdep_on_maybe(rwp); \ - } \ - _rc_; \ -}) /* END CSTYLED */ int spl_rw_init(void); void spl_rw_fini(void); -int rwsem_tryupgrade(struct rw_semaphore *rwsem); #endif /* _SPL_RWLOCK_H */ diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c index 886e16924e6..10f7c38db4e 100644 --- a/module/spl/spl-rwlock.c +++ b/module/spl/spl-rwlock.c @@ -24,106 +24,5 @@ * Solaris Porting Layer (SPL) Reader/Writer Lock Implementation. */ -#include -#include - -#if defined(CONFIG_PREEMPT_RT_FULL) - -#include -#define RT_MUTEX_OWNER_MASKALL 1UL - -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ -#if defined(READER_BIAS) && defined(WRITER_BIAS) - /* - * After the 4.9.20-rt16 kernel the realtime patch series lifted the - * single reader restriction. While this could be accommodated by - * adding additional compatibility code assume the rwsem can never - * be upgraded. All caller must already cleanly handle this case. - */ - return (0); -#else - ASSERT((struct task_struct *) - ((unsigned long)rwsem->lock.owner & ~RT_MUTEX_OWNER_MASKALL) == - current); - - /* - * Prior to 4.9.20-rt16 kernel the realtime patch series, rwsem is - * implemented as a single mutex held by readers and writers alike. - * However, this implementation would prevent a thread from taking - * a read lock twice, as the mutex would already be locked on - * the second attempt. Therefore the implementation allows a - * single thread to take a rwsem as read lock multiple times - * tracking that nesting as read_depth counter. - */ - if (rwsem->read_depth <= 1) { - /* - * In case, the current thread has not taken the lock - * more than once as read lock, we can allow an - * upgrade to a write lock. rwsem_rt.h implements - * write locks as read_depth == 0. - */ - rwsem->read_depth = 0; - return (1); - } - return (0); -#endif -} -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - int ret = 0; - unsigned long flags; - spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags); - if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE && - list_empty(&rwsem->wait_list)) { - ret = 1; - RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE; - } - spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags); - return (ret); -} -#elif defined(RWSEM_ACTIVE_MASK) -#if defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - long val; - val = atomic_long_cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - typeof(rwsem->count) val; - val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#endif -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - return (0); -} -#endif - -int -rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - if (__rwsem_tryupgrade(rwsem)) { - rwsem_release(&rwsem->dep_map, 1, _RET_IP_); - rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); - return (1); - } - return (0); -} -EXPORT_SYMBOL(rwsem_tryupgrade); - int spl_rw_init(void) { return 0; } void spl_rw_fini(void) { } From 428a63cc62c31056b602e80ec072d8093ca049c8 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jul 2019 14:40:15 -0700 Subject: [PATCH 111/325] Retire unused spl_{mutex,rwlock}_{init_fini} These functions are unused and can be removed along with the spl-mutex.c and spl-rwlock.c source files. Reviewed-by: Tony Hutter Reviewed-by: Tomohiro Kusumi Signed-off-by: Brian Behlendorf Closes #9029 --- include/spl/sys/mutex.h | 3 --- include/spl/sys/rwlock.h | 4 ---- module/spl/Makefile.in | 2 -- module/spl/spl-generic.c | 38 +++++++++++++------------------------- module/spl/spl-mutex.c | 30 ------------------------------ module/spl/spl-rwlock.c | 28 ---------------------------- 6 files changed, 13 insertions(+), 92 deletions(-) delete mode 100644 module/spl/spl-mutex.c delete mode 100644 module/spl/spl-rwlock.c diff --git a/include/spl/sys/mutex.h b/include/spl/sys/mutex.h index a61f35c61eb..73da2368559 100644 --- a/include/spl/sys/mutex.h +++ b/include/spl/sys/mutex.h @@ -181,7 +181,4 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ /* NOTE: do not dereference mp after this point */ \ } -int spl_mutex_init(void); -void spl_mutex_fini(void); - #endif /* _SPL_MUTEX_H */ diff --git a/include/spl/sys/rwlock.h b/include/spl/sys/rwlock.h index 89e02fa8f04..60f5bfd986b 100644 --- a/include/spl/sys/rwlock.h +++ b/include/spl/sys/rwlock.h @@ -196,10 +196,6 @@ RW_READ_HELD(krwlock_t *rwp) downgrade_write(SEM(rwp)); \ spl_rw_lockdep_on_maybe(rwp); \ }) - /* END CSTYLED */ -int spl_rw_init(void); -void spl_rw_fini(void); - #endif /* _SPL_RWLOCK_H */ diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in index 3bcbf63cbc6..e16666aa94f 100644 --- a/module/spl/Makefile.in +++ b/module/spl/Makefile.in @@ -16,10 +16,8 @@ $(MODULE)-objs += spl-kmem.o $(MODULE)-objs += spl-kmem-cache.o $(MODULE)-objs += spl-kobj.o $(MODULE)-objs += spl-kstat.o -$(MODULE)-objs += spl-mutex.o $(MODULE)-objs += spl-proc.o $(MODULE)-objs += spl-procfs-list.o -$(MODULE)-objs += spl-rwlock.o $(MODULE)-objs += spl-taskq.o $(MODULE)-objs += spl-thread.o $(MODULE)-objs += spl-tsd.o diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index cd2fa202051..3c5ef60bd1a 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -694,51 +694,41 @@ spl_init(void) if ((rc = spl_kvmem_init())) goto out1; - if ((rc = spl_mutex_init())) - goto out2; - - if ((rc = spl_rw_init())) - goto out3; - if ((rc = spl_tsd_init())) - goto out4; + goto out2; if ((rc = spl_taskq_init())) - goto out5; + goto out3; if ((rc = spl_kmem_cache_init())) - goto out6; + goto out4; if ((rc = spl_vn_init())) - goto out7; + goto out5; if ((rc = spl_proc_init())) - goto out8; + goto out6; if ((rc = spl_kstat_init())) - goto out9; + goto out7; if ((rc = spl_zlib_init())) - goto out10; + goto out8; return (rc); -out10: - spl_kstat_fini(); -out9: - spl_proc_fini(); out8: - spl_vn_fini(); + spl_kstat_fini(); out7: - spl_kmem_cache_fini(); + spl_proc_fini(); out6: - spl_taskq_fini(); + spl_vn_fini(); out5: - spl_tsd_fini(); + spl_kmem_cache_fini(); out4: - spl_rw_fini(); + spl_taskq_fini(); out3: - spl_mutex_fini(); + spl_tsd_fini(); out2: spl_kvmem_fini(); out1: @@ -755,8 +745,6 @@ spl_fini(void) spl_kmem_cache_fini(); spl_taskq_fini(); spl_tsd_fini(); - spl_rw_fini(); - spl_mutex_fini(); spl_kvmem_fini(); } diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c deleted file mode 100644 index ba818862b67..00000000000 --- a/module/spl/spl-mutex.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Mutex Implementation. - */ - -#include - -int spl_mutex_init(void) { return 0; } -void spl_mutex_fini(void) { } diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c deleted file mode 100644 index 10f7c38db4e..00000000000 --- a/module/spl/spl-rwlock.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Reader/Writer Lock Implementation. - */ - -int spl_rw_init(void) { return 0; } -void spl_rw_fini(void) { } From 3c144b92671df9c6e9d926e6c19a34893645500e Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Fri, 19 Jul 2019 04:48:46 +0900 Subject: [PATCH 112/325] Fix wrong comment on zcr_blksz_{min,max} These aren't tunable; illumos has this comment fixed in "3742 zfs comments need cleaner, more consistent style", so sync with that. Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #9052 --- module/zfs/zfs_vnops.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 9d8a9cbc541..4f07111f25e 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -5074,13 +5074,14 @@ zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) #ifdef HAVE_UIO_ZEROCOPY /* - * Tunable, both must be a power of 2. - * - * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf - * zcr_blksz_max: if set to less than the file block size, allow loaning out of - * an arcbuf for a partial block read + * The smallest read we may consider to loan out an arcbuf. + * This must be a power of 2. */ int zcr_blksz_min = (1 << 10); /* 1K */ +/* + * If set to less than the file block size, allow loaning out of an + * arcbuf for a partial block read. This must be a power of 2. + */ int zcr_blksz_max = (1 << 17); /* 128K */ /*ARGSUSED*/ From bbbe4b0a9885fb671186da86b63c09f262852c65 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 18 Jul 2019 12:55:29 -0700 Subject: [PATCH 113/325] hdr_recl calls zthr_wakeup() on destroyed zthr There exists a race condition were hdr_recl() calls zthr_wakeup() on a destroyed zthr. The timeline is the following: [1] hdr_recl() runs first and goes intro zthr_wakeup() because arc_initialized is set. [2] arc_fini() is called by another thread, zeroes that flag, destroying the zthr, and goes into buf_init(). [3] hdr_recl() tries to enter the destroyed mutex and we blow up. This patch ensures that the ARC's zthrs are not offloaded any new work once arc_initialized is set and then destroys them after all of the ARC state has been deleted. Reviewed by: Matt Ahrens Reviewed by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #9047 --- module/zfs/arc.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index f125ca6a4d1..53a44bdaf44 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ @@ -5079,6 +5079,9 @@ arc_kmem_reap_soon(void) static boolean_t arc_adjust_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + /* * This is necessary so that any changes which may have been made to * many of the zfs_arc_* module parameters will be propagated to @@ -5166,6 +5169,9 @@ arc_adjust_cb(void *arg, zthr_t *zthr) static boolean_t arc_reap_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + int64_t free_memory = arc_available_memory(); /* @@ -7924,11 +7930,9 @@ arc_fini(void) list_destroy(&arc_prune_list); mutex_destroy(&arc_prune_mtx); - (void) zthr_cancel(arc_adjust_zthr); - zthr_destroy(arc_adjust_zthr); + (void) zthr_cancel(arc_adjust_zthr); (void) zthr_cancel(arc_reap_zthr); - zthr_destroy(arc_reap_zthr); mutex_destroy(&arc_adjust_lock); cv_destroy(&arc_adjust_waiters_cv); @@ -7941,6 +7945,14 @@ arc_fini(void) buf_fini(); arc_state_fini(); + /* + * We destroy the zthrs after all the ARC state has been + * torn down to avoid the case of them receiving any + * wakeup() signals after they are destroyed. + */ + zthr_destroy(arc_adjust_zthr); + zthr_destroy(arc_reap_zthr); + ASSERT0(arc_loaned_bytes); } From 1c4b0fc7457d6c6dac801f4a4a694ffe954bb91f Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 18 Jul 2019 13:02:33 -0700 Subject: [PATCH 114/325] Race condition between spa async threads and export In the past we've seen multiple race conditions that have to do with open-context threads async threads and concurrent calls to spa_export()/spa_destroy() (including the one referenced in issue #9015). This patch ensures that only one thread can execute the main body of spa_export_common() at a time, with subsequent threads returning with a new error code created just for this situation, eliminating this way any race condition bugs introduced by concurrent calls to this function. Reviewed by: Matt Ahrens Reviewed by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #9015 Closes #9044 --- cmd/ztest/ztest.c | 18 +++++++++++++++++- include/libzfs.h | 1 + include/sys/fs/zfs.h | 1 + include/sys/spa_impl.h | 1 + lib/libzfs/libzfs_util.c | 5 +++++ module/zfs/spa.c | 18 +++++++++++++++++- 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 9c2cf950183..3bf840d88ed 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2745,8 +2745,24 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); + + /* + * We open a reference to the spa and then we try to export it + * expecting one of the following errors: + * + * EBUSY + * Because of the reference we just opened. + * + * ZFS_ERR_EXPORT_IN_PROGRESS + * For the case that there is another ztest thread doing + * an export concurrently. + */ VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); - VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); + int error = spa_destroy(zo->zo_pool); + if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) { + fatal(0, "spa_destroy(%s) returned unexpected value %d", + spa->spa_name, error); + } spa_close(spa, FTAG); (void) pthread_rwlock_unlock(&ztest_name_lock); diff --git a/include/libzfs.h b/include/libzfs.h index e2ec2d9bce7..a5b2a8393f4 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -147,6 +147,7 @@ typedef enum zfs_error { EZFS_NO_TRIM, /* no active trim */ EZFS_TRIM_NOTSUP, /* device does not support trim */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ + EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_UNKNOWN } zfs_error_t; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 3bcefdbfd77..c167a594a7d 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1318,6 +1318,7 @@ typedef enum { ZFS_ERR_FROM_IVSET_GUID_MISSING, ZFS_ERR_FROM_IVSET_GUID_MISMATCH, ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, + ZFS_ERR_EXPORT_IN_PROGRESS, } zfs_errno_t; /* diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 66032d9aad7..0de8613d3eb 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -219,6 +219,7 @@ struct spa { spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ + boolean_t spa_is_exporting; /* true while exporting pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_special_class; /* special allocation class */ diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 19bb57ad437..dc2d68ebebb 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -303,6 +303,8 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_NO_RESILVER_DEFER: return (dgettext(TEXT_DOMAIN, "this action requires the " "resilver_defer feature")); + case EZFS_EXPORT_IN_PROGRESS: + return (dgettext(TEXT_DOMAIN, "pool export in progress")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -598,6 +600,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_VDEV_TOO_BIG: zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap); break; + case ZFS_ERR_EXPORT_IN_PROGRESS: + zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap); + break; case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " diff --git a/module/zfs/spa.c b/module/zfs/spa.c index eb3ff91a073..ce622cee88b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5722,6 +5722,13 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, return (SET_ERROR(ENOENT)); } + if (spa->spa_is_exporting) { + /* the pool is being exported by another thread */ + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS)); + } + spa->spa_is_exporting = B_TRUE; + /* * Put a hold on the pool, drop the namespace lock, stop async tasks, * reacquire the namespace lock, and see if we can export. @@ -5757,6 +5764,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, (spa->spa_inject_ref != 0 && new_state != POOL_STATE_UNINITIALIZED)) { spa_async_resume(spa); + spa->spa_is_exporting = B_FALSE; mutex_exit(&spa_namespace_lock); return (SET_ERROR(EBUSY)); } @@ -5771,6 +5779,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { spa_async_resume(spa); + spa->spa_is_exporting = B_FALSE; mutex_exit(&spa_namespace_lock); return (SET_ERROR(EXDEV)); } @@ -5822,9 +5831,16 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, if (!hardforce) spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_remove(spa); + } else { + /* + * If spa_remove() is not called for this spa_t and + * there is any possibility that it can be reused, + * we make sure to reset the exporting flag. + */ + spa->spa_is_exporting = B_FALSE; } - mutex_exit(&spa_namespace_lock); + mutex_exit(&spa_namespace_lock); return (0); } From be068aeea86433481c1bc18cf1a76ed033daea2e Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 19 Jul 2019 11:21:54 -0700 Subject: [PATCH 115/325] Move some tests to cli_user/zpool_status The tests in tests/functional/cli_root/zpool_status should all require root. However, linux.run has "user =" specified for those tests, which means they run as a normal user. When I removed that line to run them as root, the following tests did not pass: zpool_status_003_pos zpool_status_-c_disable zpool_status_-c_homedir zpool_status_-c_searchpath These tests need to be run as a normal user. To fix this, move these tests to a new tests/functional/cli_user/zpool_status directory. Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Reviewed-by: Giuseppe Di Natale Signed-off-by: Tony Hutter Closes #9057 --- configure.ac | 1 + tests/runfiles/linux.run | 11 ++++--- .../cli_root/zpool_status/Makefile.am | 6 +--- .../tests/functional/cli_user/Makefile.am | 3 +- .../cli_user/zpool_status/Makefile.am | 8 +++++ .../cli_user/zpool_status/cleanup.ksh | 30 +++++++++++++++++ .../cli_user/zpool_status/setup.ksh | 32 +++++++++++++++++++ .../zpool_status/zpool_status_-c_disable.ksh | 0 .../zpool_status/zpool_status_-c_homedir.ksh | 0 .../zpool_status_-c_searchpath.ksh | 0 .../zpool_status/zpool_status_003_pos.ksh | 0 11 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh rename tests/zfs-tests/tests/functional/{cli_root => cli_user}/zpool_status/zpool_status_-c_disable.ksh (100%) rename tests/zfs-tests/tests/functional/{cli_root => cli_user}/zpool_status/zpool_status_-c_homedir.ksh (100%) rename tests/zfs-tests/tests/functional/{cli_root => cli_user}/zpool_status/zpool_status_-c_searchpath.ksh (100%) rename tests/zfs-tests/tests/functional/{cli_root => cli_user}/zpool_status/zpool_status_003_pos.ksh (100%) diff --git a/configure.ac b/configure.ac index ea2e355c70b..cf1d8b394ad 100644 --- a/configure.ac +++ b/configure.ac @@ -272,6 +272,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile + tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile tests/zfs-tests/tests/functional/compression/Makefile tests/zfs-tests/tests/functional/cp_files/Makefile tests/zfs-tests/tests/functional/ctime/Makefile diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 27e36b594ab..c08bc4e31a3 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -462,10 +462,7 @@ tests = ['zpool_split_cliargs', 'zpool_split_devices', tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] -tests = ['zpool_status_001_pos', 'zpool_status_002_pos','zpool_status_003_pos', - 'zpool_status_-c_disable', 'zpool_status_-c_homedir', - 'zpool_status_-c_searchpath'] -user = +tests = ['zpool_status_001_pos', 'zpool_status_002_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] @@ -529,6 +526,12 @@ tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = tags = ['functional', 'cli_user', 'zpool_list'] +[tests/functional/cli_user/zpool_status] +tests = ['zpool_status_003_pos', 'zpool_status_-c_disable', + 'zpool_status_-c_homedir', 'zpool_status_-c_searchpath'] +user = +tags = ['functional', 'cli_user', 'zpool_status'] + [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'compress_004_pos'] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am index aab4de0e7c8..beb59e3d066 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am @@ -3,8 +3,4 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ zpool_status_001_pos.ksh \ - zpool_status_002_pos.ksh \ - zpool_status_003_pos.ksh \ - zpool_status_-c_disable.ksh \ - zpool_status_-c_homedir.ksh \ - zpool_status_-c_searchpath.ksh + zpool_status_002_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/Makefile.am index f1ff32e8d22..119f8ee187f 100644 --- a/tests/zfs-tests/tests/functional/cli_user/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_user/Makefile.am @@ -2,4 +2,5 @@ SUBDIRS = \ misc \ zfs_list \ zpool_iostat \ - zpool_list + zpool_list \ + zpool_status diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am new file mode 100644 index 00000000000..e1b33965774 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_user/zpool_status +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_status_003_pos.ksh \ + zpool_status_-c_disable.ksh \ + zpool_status_-c_homedir.ksh \ + zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh new file mode 100755 index 00000000000..79cd6e9f908 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh new file mode 100755 index 00000000000..6a9af3bc28c --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh From 65a0b28b42976a23c354f0518e0e1cc02b943b46 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sat, 20 Jul 2019 03:23:56 +0900 Subject: [PATCH 116/325] Fix module_param() type for zfs_read_chunk_size zfs_read_chunk_size is unsigned long. Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #9051 --- module/zfs/zfs_vnops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 4f07111f25e..2a49293c245 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -5260,9 +5260,11 @@ EXPORT_SYMBOL(zfs_putpage); EXPORT_SYMBOL(zfs_dirty_inode); EXPORT_SYMBOL(zfs_map); -/* CSTYLED */ +/* BEGIN CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); -module_param(zfs_read_chunk_size, long, 0644); +module_param(zfs_read_chunk_size, ulong, 0644); MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk"); +/* END CSTYLED */ + #endif From 4f951b183c645f320ad375bb41b319634370e3ac Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Fri, 26 Jul 2019 03:59:20 +0900 Subject: [PATCH 117/325] Don't directly cast unsigned long to void* Cast to uintptr_t first for portability on integer to/from pointer conversion. Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #9065 --- module/zfs/zfs_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index c6b55d24f7e..152433d6079 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -7110,7 +7110,8 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); - error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); + error = ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), + flag); if (error != 0) { error = SET_ERROR(EFAULT); goto out; @@ -7269,7 +7270,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) out: nvlist_free(innvl); - rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); + rc = ddi_copyout(zc, (void *)(uintptr_t)arg, sizeof (zfs_cmd_t), flag); if (error == 0 && rc != 0) error = SET_ERROR(EFAULT); if (error == 0 && vec->zvec_allow_log) { From 1f5979d23f4b06b3d8ebc58b7d7e3946393fa9ce Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 26 Jul 2019 12:07:48 -0700 Subject: [PATCH 118/325] zed crashes when devid not present zed core dumps due to a NULL pointer in zfs_agent_iter_vdev(). The gs_devid is NULL, but the nvl has a "devid" entry. zfs_agent_post_event() checks that ZFS_EV_VDEV_GUID or DEV_IDENTIFIER is present in nvl, but then later it and zfs_agent_iter_vdev() assume that DEV_IDENTIFIER is present and thus gs_devid is set. Typically this is not a problem because usually either all vdevs have devid's, or none of them do. Since zfs_agent_iter_vdev() first checks if the vdev has devid before dereferencing gs_devid, the problem isn't typically encountered. However, if some vdevs have devid's and some do not, then the problem is easily reproduced. This can happen if the pool has been moved from a system that has devid's to one that does not. The fix is for zfs_agent_iter_vdev() to only try to match the devid's if both nvl and gsp have devid's present. Reviewed-by: Prashanth Sreenivasa Reviewed-by: Don Brady Reviewed-by: Brian Behlendorf Reviewed-by: loli10K Signed-off-by: Matthew Ahrens External-issue: DLPX-65090 Closes #9054 Closes #9060 --- cmd/zed/agents/zfs_agents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 6d392604bce..006e0ab99f4 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -116,7 +116,8 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) /* * On a devid match, grab the vdev guid and expansion time, if any. */ - if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && + if (gsp->gs_devid != NULL && + (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); From 6c68594675ed3fdc1d663da47eaeb27c3db97f29 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sat, 27 Jul 2019 05:52:30 +0900 Subject: [PATCH 119/325] Implement secpolicy_vnode_setid_retain() Don't unconditionally return 0 (i.e. retain SUID/SGID). Test CAP_FSETID capability. https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t which expects SUID/SGID to be dropped on write(2) by non-owner fails without this. Most filesystems make this decision within VFS by using a generic file write for fops. Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #9035 Closes #9043 --- configure.ac | 1 + module/zfs/policy.c | 2 +- tests/runfiles/linux.run | 5 + tests/zfs-tests/tests/functional/Makefile.am | 1 + .../tests/functional/suid/.gitignore | 1 + .../tests/functional/suid/Makefile.am | 16 +++ .../tests/functional/suid/cleanup.ksh | 34 +++++ .../zfs-tests/tests/functional/suid/setup.ksh | 35 +++++ .../functional/suid/suid_write_to_file.c | 133 ++++++++++++++++++ .../functional/suid/suid_write_to_none.ksh | 52 +++++++ .../functional/suid/suid_write_to_sgid.ksh | 52 +++++++ .../functional/suid/suid_write_to_suid.ksh | 52 +++++++ .../suid/suid_write_to_suid_sgid.ksh | 52 +++++++ 13 files changed, 435 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/tests/functional/suid/.gitignore create mode 100644 tests/zfs-tests/tests/functional/suid/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/suid/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/suid/setup.ksh create mode 100644 tests/zfs-tests/tests/functional/suid/suid_write_to_file.c create mode 100755 tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh create mode 100755 tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh create mode 100755 tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh create mode 100755 tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh diff --git a/configure.ac b/configure.ac index cf1d8b394ad..e8592ffb1d2 100644 --- a/configure.ac +++ b/configure.ac @@ -328,6 +328,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/snapshot/Makefile tests/zfs-tests/tests/functional/snapused/Makefile tests/zfs-tests/tests/functional/sparse/Makefile + tests/zfs-tests/tests/functional/suid/Makefile tests/zfs-tests/tests/functional/alloc_class/Makefile tests/zfs-tests/tests/functional/threadsappend/Makefile tests/zfs-tests/tests/functional/tmpfile/Makefile diff --git a/module/zfs/policy.c b/module/zfs/policy.c index 55c93274791..a723235d301 100644 --- a/module/zfs/policy.c +++ b/module/zfs/policy.c @@ -209,7 +209,7 @@ secpolicy_vnode_setdac(const cred_t *cr, uid_t owner) int secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot) { - return (0); + return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM)); } /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index c08bc4e31a3..1c368d20c45 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -847,6 +847,11 @@ tags = ['functional', 'snapused'] tests = ['sparse_001_pos'] tags = ['functional', 'sparse'] +[tests/functional/suid] +tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', + 'suid_write_to_none'] +tags = ['functional', 'suid'] + [tests/functional/threadsappend] tests = ['threadsappend_001_pos'] tags = ['functional', 'threadsappend'] diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index da27673ec94..ac0ba7cf3d1 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -66,6 +66,7 @@ SUBDIRS = \ snapshot \ snapused \ sparse \ + suid \ threadsappend \ tmpfile \ trim \ diff --git a/tests/zfs-tests/tests/functional/suid/.gitignore b/tests/zfs-tests/tests/functional/suid/.gitignore new file mode 100644 index 00000000000..a9a3db79ba4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/.gitignore @@ -0,0 +1 @@ +/suid_write_to_file diff --git a/tests/zfs-tests/tests/functional/suid/Makefile.am b/tests/zfs-tests/tests/functional/suid/Makefile.am new file mode 100644 index 00000000000..594d2b77ca8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/Makefile.am @@ -0,0 +1,16 @@ +include $(top_srcdir)/config/Rules.am + +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +dist_pkgdata_SCRIPTS = \ + suid_write_to_suid.ksh \ + suid_write_to_sgid.ksh \ + suid_write_to_suid_sgid.ksh \ + suid_write_to_none.ksh \ + cleanup.ksh \ + setup.ksh + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +pkgexec_PROGRAMS = suid_write_to_file +suid_write_to_file_SOURCES = suid_write_to_file.c diff --git a/tests/zfs-tests/tests/functional/suid/cleanup.ksh b/tests/zfs-tests/tests/functional/suid/cleanup.ksh new file mode 100755 index 00000000000..6e41e02faf5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/suid/setup.ksh b/tests/zfs-tests/tests/functional/suid/setup.ksh new file mode 100755 index 00000000000..d04d5568c00 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/setup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c new file mode 100644 index 00000000000..571dc553bec --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void +test_stat_mode(mode_t extra) +{ + struct stat st; + int i, fd; + char fpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + char buf[] = "test"; + mode_t res; + mode_t mode = 0777 | extra; + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + umask(0); + if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) { + perror("mkdir"); + exit(2); + } + + snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]); + unlink(fpath); + if (stat(fpath, &st) == 0) { + fprintf(stderr, "%s exists\n", fpath); + exit(3); + } + + fd = creat(fpath, mode); + if (fd == -1) { + perror("creat"); + exit(4); + } + close(fd); + + if (setuid(65534) == -1) { + perror("setuid"); + exit(5); + } + + fd = open(fpath, O_RDWR); + if (fd == -1) { + perror("open"); + exit(6); + } + + if (write(fd, buf, sizeof (buf)) == -1) { + perror("write"); + exit(7); + } + close(fd); + + if (stat(fpath, &st) == -1) { + perror("stat"); + exit(8); + } + unlink(fpath); + + /* Verify SUID/SGID are dropped */ + res = st.st_mode & (0777 | S_ISUID | S_ISGID); + if (res != (mode & 0777)) { + fprintf(stderr, "stat(2) %o\n", res); + exit(9); + } +} + +int +main(int argc, char *argv[]) +{ + const char *name; + mode_t extra; + + if (argc < 2) { + fprintf(stderr, "Invalid argc\n"); + exit(1); + } + + name = argv[1]; + if (strcmp(name, "SUID") == 0) { + extra = S_ISUID; + } else if (strcmp(name, "SGID") == 0) { + extra = S_ISGID; + } else if (strcmp(name, "SUID_SGID") == 0) { + extra = S_ISUID | S_ISGID; + } else if (strcmp(name, "NONE") == 0) { + extra = 0; + } else { + fprintf(stderr, "Invalid name %s\n", name); + exit(1); + } + + test_stat_mode(extra); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh new file mode 100755 index 00000000000..dd01978619f --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to regular file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to regular file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" + +log_pass "Verify write(2) to regular file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh new file mode 100755 index 00000000000..49ae2bd1b31 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" + +log_pass "Verify write(2) to SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh new file mode 100755 index 00000000000..3983aad2e51 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" + +log_pass "Verify write(2) to SUID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh new file mode 100755 index 00000000000..a058c7e7d4b --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID/SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID/SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID/SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" + +log_pass "Verify write(2) to SUID/SGID file by non-owner passed" From a8c5bcb5de431a792287fd355b8599513ddf69c5 Mon Sep 17 00:00:00 2001 From: George Wilson Date: Sun, 28 Jul 2019 21:13:56 -0400 Subject: [PATCH 120/325] Race between zfs-share and zfs-mount services When a system boots the zfs-mount.service and the zfs-share.service can start simultaneously. What may be unclear is that sharing a filesystem will first mount the filesystem if it's not already mounted. This means that both service can race to mount the same fileystem. This race can result in a SEGFAULT or EBUSY conditions. This change explicitly defines the start ordering between the two services such that the zfs-mount.service is solely responsible for mounting filesystems eliminating the race between "zfs mount -a" and "zfs share -a" commands. Reviewed-by: Sebastien Roy Reviewed-by: Brian Behlendorf Signed-off-by: George Wilson Closes #9083 --- etc/systemd/system/zfs-share.service.in | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in index 75ff6e94676..5f4ba411b3c 100644 --- a/etc/systemd/system/zfs-share.service.in +++ b/etc/systemd/system/zfs-share.service.in @@ -5,6 +5,7 @@ After=nfs-server.service nfs-kernel-server.service After=smb.service Before=rpc-statd-notify.service Wants=zfs-mount.service +After=zfs-mount.service PartOf=nfs-server.service nfs-kernel-server.service PartOf=smb.service From 8c00159411ed891b91f8b4f3d4356c038ffa81ca Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sun, 28 Jul 2019 18:15:26 -0700 Subject: [PATCH 121/325] Fix channel programs on s390x When adapting the original sources for s390x the JMP_BUF_CNT was mistakenly halved due to an incorrect assumption of the size of a unsigned long. They are 8 bytes for the s390x architecture. Increase JMP_BUF_CNT accordingly. Authored-by: Don Brady Reviewed-by: Brian Behlendorf Reported-by: Colin Ian King Tested-by: Colin Ian King Signed-off-by: Brian Behlendorf Closes #8992 Closes #9080 --- module/lua/ldo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/lua/ldo.c b/module/lua/ldo.c index aca02b23477..59d0b6a2c29 100644 --- a/module/lua/ldo.c +++ b/module/lua/ldo.c @@ -61,7 +61,7 @@ #elif defined(__mips__) #define JMP_BUF_CNT 12 #elif defined(__s390x__) -#define JMP_BUF_CNT 9 +#define JMP_BUF_CNT 18 #else #define JMP_BUF_CNT 1 #endif From 6c9882d5dbc6bcaf39ae2ca54860743c083fa940 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 30 Jul 2019 09:18:30 -0700 Subject: [PATCH 122/325] Improve performance by using dmu_tx_hold_*_by_dnode() In zfs_write() and dmu_tx_hold_sa(), we can use dmu_tx_hold_*_by_dnode() instead of dmu_tx_hold_*(), since we already have a dbuf from the target dnode in hand. This eliminates some calls to dnode_hold(), which can be expensive. This is especially impactful if several threads are accessing objects that are in the same block of dnodes, because they will contend for that dbuf's lock. We are seeing 10-20% performance wins for the sequential_writes tests in the performance test suite, when doing >=128K writes to files with recordsize=8K. This also removes some unnecessary casts that are in the area. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Nguyen Signed-off-by: Matthew Ahrens Closes #9081 --- module/zfs/dmu_tx.c | 6 ++++-- module/zfs/sa.c | 10 +++++----- module/zfs/zfs_vnops.c | 8 ++++++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 7d65e842ff0..d6a42f84c75 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1338,7 +1338,10 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) object = sa_handle_object(hdl); - dmu_tx_hold_bonus(tx, object); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; + DB_DNODE_ENTER(db); + dmu_tx_hold_bonus_by_dnode(tx, DB_DNODE(db)); + DB_DNODE_EXIT(db); if (tx->tx_objset->os_sa->sa_master_obj == 0) return; @@ -1360,7 +1363,6 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) ASSERT(tx->tx_txg == 0); dmu_tx_hold_spill(tx, object); } else { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; dnode_t *dn; DB_DNODE_ENTER(db); diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 56a606962a7..4999fef345d 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1380,7 +1380,7 @@ sa_handle_destroy(sa_handle_t *hdl) dmu_buf_rele(hdl->sa_bonus, NULL); if (hdl->sa_spill) - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); mutex_exit(&hdl->sa_lock); kmem_cache_free(sa_cache, hdl); @@ -2028,7 +2028,7 @@ sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, hdl->sa_spill_tab = NULL; } - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); hdl->sa_spill = NULL; } @@ -2131,13 +2131,13 @@ sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) void sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) { - dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); + dmu_object_info_from_db(hdl->sa_bonus, doi); } void sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) { - dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, + dmu_object_size_from_db(hdl->sa_bonus, blksize, nblocks); } @@ -2150,7 +2150,7 @@ sa_set_userp(sa_handle_t *hdl, void *ptr) dmu_buf_t * sa_get_db(sa_handle_t *hdl) { - return ((dmu_buf_t *)hdl->sa_bonus); + return (hdl->sa_bonus); } void * diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 2a49293c245..7f33aea43d4 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -775,7 +775,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) */ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); + DB_DNODE_ENTER(db); + dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, + MIN(n, max_blksz)); + DB_DNODE_EXIT(db); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { @@ -1048,7 +1052,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) return (SET_ERROR(ENOENT)); } - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; zgd->zgd_private = zp; From 6d1599c1e1d1fabb14eb27f8f28d3c6b539f3fdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Tue, 30 Jul 2019 18:59:38 +0200 Subject: [PATCH 123/325] Increase default zcmd allocation to 256K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating hundreds of clones (for example using containers with LXD) cloning slows down as the number of clones increases over time. The reason for this is that the fetching of the clone information using a small zcmd buffer requires two ioctl calls, one to determine the size and a second to return the data. However, this requires gathering the data twice, once to determine the size and again to populate the zcmd buffer to return it to userspace. These are expensive ioctl() calls, so instead, make the default buffer size much larger: 256K. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Colin Ian King Signed-off-by: Michael Niewöhner Closes #9084 --- lib/libzfs/libzfs_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index dc2d68ebebb..eed6282ca35 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1139,7 +1139,7 @@ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) - len = 16 * 1024; + len = 256 * 1024; zc->zc_nvlist_dst_size = len; zc->zc_nvlist_dst = (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); From 569f5d5d0543a1f1f4958a65fafc3eb7bf1778d1 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Tue, 13 Aug 2019 20:21:27 -0700 Subject: [PATCH 124/325] Fix out-of-order ZIL txtype lost on hardlinked files We should only call zil_remove_async when an object is removed. However, in current implementation, it is called whenever TX_REMOVE is called. In the case of hardlinked file, every unlink will generate TX_REMOVE and causing operations to be dropped even when the object is not removed. We fix this by only calling zil_remove_async when the file is fully unlinked. Reviewed-by: George Wilson Reviewed-by: Brian Behlendorf Reviewed-by: Prakash Surya Signed-off-by: Chunwei Chen Closes #8769 Closes #9061 --- include/sys/zfs_znode.h | 2 +- module/zfs/zfs_log.c | 15 ++++++++++++++- module/zfs/zfs_vnops.c | 5 +++-- module/zfs/zil.c | 12 +----------- .../tests/functional/slog/slog_replay_fs.ksh | 8 ++++++++ 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d4a3ea76933..add45a7f46e 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -371,7 +371,7 @@ extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, vattr_t *vap); extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid); + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked); #define ZFS_NO_OBJECT 0 /* no object id */ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 15c396ce032..5966b7612b3 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -380,12 +380,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +void zil_remove_async(zilog_t *zilog, uint64_t oid); + /* * Handles both TX_REMOVE and TX_RMDIR transactions. */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid) + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked) { itx_t *itx; lr_remove_t *lr; @@ -401,6 +403,17 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, itx->itx_oid = foid; + /* + * Object ids can be re-instantiated in the next txg so + * remove any async transactions to avoid future leaks. + * This can happen if a fsync occurs on the re-instantiated + * object for a WR_INDIRECT or WR_NEED_COPY write, which gets + * the new file data and flushes a write record for the old object. + */ + if (unlinked) { + ASSERT((txtype & ~TX_CI) == TX_REMOVE); + zil_remove_async(zilog, foid); + } zil_itx_assign(zilog, itx, tx); } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 7f33aea43d4..3c227816428 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1886,7 +1886,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, obj); + zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); dmu_tx_commit(tx); out: @@ -2219,7 +2219,8 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); + zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT, + B_FALSE); } dmu_tx_commit(tx); diff --git a/module/zfs/zil.c b/module/zfs/zil.c index ff14a98b6b2..5249a0e9366 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1824,7 +1824,7 @@ zil_aitx_compare(const void *x1, const void *x2) /* * Remove all async itx with the given oid. */ -static void +void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; @@ -1876,16 +1876,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) itxg_t *itxg; itxs_t *itxs, *clean = NULL; - /* - * Object ids can be re-instantiated in the next txg so - * remove any async transactions to avoid future leaks. - * This can happen if a fsync occurs on the re-instantiated - * object for a WR_INDIRECT or WR_NEED_COPY write, which gets - * the new file data and flushes a write record for the old object. - */ - if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) - zil_remove_async(zilog, itx->itx_oid); - /* * Ensure the data of a renamed file is committed before the rename. */ diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh index 5f281a756f1..ea3f8451b9e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh @@ -160,6 +160,14 @@ log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file +# TX_WRITE, TX_LINK, TX_REMOVE +# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \ + count=8 +log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ + /$TESTPOOL/$TESTFS/link_and_unlink.link +log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) # From 65469f6e302205858b26da93c191ffab5bedbdff Mon Sep 17 00:00:00 2001 From: Dominic Pearson Date: Tue, 20 Aug 2019 00:22:52 +0200 Subject: [PATCH 125/325] Linux 5.3 compat: Makefile subdir-m no longer supported Uses obj-m instead, due to kernel changes. See LKML: Masahiro Yamada, Tue, 6 Aug 2019 19:03:23 +0900 Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Dominic Pearson Closes #9169 --- .gitignore | 11 +++++++++++ module/Makefile.in | 24 ++++++++++++------------ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 549fa59f382..ae9e22dfa7b 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,14 @@ cscope.* *.log venv +# +# Module leftovers +# +/module/avl/zavl.mod +/module/icp/icp.mod +/module/lua/zlua.mod +/module/nvpair/znvpair.mod +/module/spl/spl.mod +/module/unicode/zunicode.mod +/module/zcommon/zcommon.mod +/module/zfs/zfs.mod diff --git a/module/Makefile.in b/module/Makefile.in index eca7691aedb..7477dbe5650 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -1,11 +1,11 @@ -subdir-m += avl -subdir-m += icp -subdir-m += lua -subdir-m += nvpair -subdir-m += spl -subdir-m += unicode -subdir-m += zcommon -subdir-m += zfs +obj-m += avl/ +obj-m += icp/ +obj-m += lua/ +obj-m += nvpair/ +obj-m += spl/ +obj-m += unicode/ +obj-m += zcommon/ +obj-m += zfs/ INSTALL_MOD_DIR ?= extra @@ -60,13 +60,13 @@ modules_install: modules_uninstall: @# Uninstall the kernel modules kmoddir=$(DESTDIR)$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@ - list='$(subdir-m)'; for subdir in $$list; do \ - $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$subdir; \ + list='$(obj-m)'; for objdir in $$list; do \ + $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \ done distdir: - list='$(subdir-m)'; for subdir in $$list; do \ - (cd @top_srcdir@/module && find $$subdir \ + list='$(obj-m)'; for objdir in $$list; do \ + (cd @top_srcdir@/module && find $$objdir \ -name '*.c' -o -name '*.h' -o -name '*.S' | \ xargs cp --parents -t @abs_top_builddir@/module/$$distdir); \ done From 023ab67a64fc297bb5d773406f5b1fc6dd0d957b Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 21 Aug 2019 09:29:23 -0700 Subject: [PATCH 126/325] Linux 5.3: Fix switch() fall though compiler errors Fix some switch() fall-though compiler errors: abd.c:1504:9: error: this statement may fall through Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #9170 --- module/lua/llex.c | 9 ++++++--- module/zfs/abd.c | 4 ++++ module/zfs/vdev_raidz_math_scalar.c | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/module/lua/llex.c b/module/lua/llex.c index 8760155d054..50c301f599f 100644 --- a/module/lua/llex.c +++ b/module/lua/llex.c @@ -431,9 +431,12 @@ static int llex (LexState *ls, SemInfo *seminfo) { if (sep >= 0) { read_long_string(ls, seminfo, sep); return TK_STRING; - } - else if (sep == -1) return '['; - else lexerror(ls, "invalid long string delimiter", TK_STRING); + } else if (sep == -1) { + return '['; + } else { + lexerror(ls, "invalid long string delimiter", TK_STRING); + break; + } } case '=': { next(ls); diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 9041bd8b184..32b2c842c0d 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -1370,8 +1370,10 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, switch (parity) { case 3: len = MIN(caiters[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(caiters[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(caiters[0].iter_mapsize, len); } @@ -1461,9 +1463,11 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, case 3: len = MIN(xiters[2].iter_mapsize, len); len = MIN(citers[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(xiters[1].iter_mapsize, len); len = MIN(citers[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(xiters[0].iter_mapsize, len); len = MIN(citers[0].iter_mapsize, len); diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c index a693bff63ff..cd742e146ca 100644 --- a/module/zfs/vdev_raidz_math_scalar.c +++ b/module/zfs/vdev_raidz_math_scalar.c @@ -142,6 +142,7 @@ static const struct { a.b[6] = mul_lt[a.b[6]]; \ a.b[5] = mul_lt[a.b[5]]; \ a.b[4] = mul_lt[a.b[4]]; \ + /* falls through */ \ case 4: \ a.b[3] = mul_lt[a.b[3]]; \ a.b[2] = mul_lt[a.b[2]]; \ From 512a50f38d17f77118af6f297ddf7ba720a48ebc Mon Sep 17 00:00:00 2001 From: yshui Date: Fri, 23 Aug 2019 01:11:17 +0100 Subject: [PATCH 127/325] zfs-mount-genrator: dependencies should be space-separated Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Signed-off-by: Yuxuan Shui Closes #9174 --- etc/systemd/system-generators/zfs-mount-generator.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index ae208c965f9..3e529cb67bb 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -215,7 +215,7 @@ EOF fi # Update the dependencies for the mount file to require the # key-loading unit. - wants="${wants},${keyloadunit}" + wants="${wants} ${keyloadunit}" fi # If the mountpoint has already been created, give it precedence. From 33374f21f0f8922baa95796c70edcc4bc17df19f Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 22 Aug 2019 20:26:51 -0400 Subject: [PATCH 128/325] Make slog test setup more robust The slog tests fail when attempting to create pools using file vdevs that already exist from previous test runs. Remove these files in the setup for the test. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Ryan Moeller Closes #9194 --- tests/zfs-tests/tests/functional/slog/setup.ksh | 9 --------- tests/zfs-tests/tests/functional/slog/slog.kshlib | 11 ++++++++++- .../zfs-tests/tests/functional/slog/slog_001_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_002_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_003_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_004_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_005_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_006_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_007_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_008_neg.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_009_neg.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_010_neg.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_011_neg.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_012_neg.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_013_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_014_pos.ksh | 1 + .../zfs-tests/tests/functional/slog/slog_015_neg.ksh | 1 + .../tests/functional/slog/slog_replay_fs.ksh | 1 + .../tests/functional/slog/slog_replay_volume.ksh | 1 + 19 files changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/zfs-tests/tests/functional/slog/setup.ksh b/tests/zfs-tests/tests/functional/slog/setup.ksh index f30824d3ee9..8e8d214d823 100755 --- a/tests/zfs-tests/tests/functional/slog/setup.ksh +++ b/tests/zfs-tests/tests/functional/slog/setup.ksh @@ -38,13 +38,4 @@ if ! verify_slog_support ; then log_unsupported "This system doesn't support separate intent logs" fi -if [[ -d $VDEV ]]; then - log_must rm -rf $VDIR -fi -if [[ -d $VDEV2 ]]; then - log_must rm -rf $VDIR2 -fi -log_must mkdir -p $VDIR $VDIR2 -log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 - log_pass diff --git a/tests/zfs-tests/tests/functional/slog/slog.kshlib b/tests/zfs-tests/tests/functional/slog/slog.kshlib index 6ed7e4e0502..75cfec2d832 100644 --- a/tests/zfs-tests/tests/functional/slog/slog.kshlib +++ b/tests/zfs-tests/tests/functional/slog/slog.kshlib @@ -31,11 +31,20 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/slog/slog.cfg +function setup +{ + log_must rm -rf $VDIR $VDIR2 + log_must mkdir -p $VDIR $VDIR2 + log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 + + return 0 +} + function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 - rm -rf $TESTDIR + rm -rf $TESTDIR $VDIR $VDIR2 } # diff --git a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh index 3d3daf5f9cc..a4c35ed9e98 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Creating a pool with a log device succeeds." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh index b056f19cdb8..91904aa612d 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding a log device to normal pool works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh index c647b8f54b7..0b4d6ede3e1 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding an extra log device works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh index 4b0b3439a2e..10f28dcc000 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Attaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh index cbbb9486913..4836f6f2793 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Detaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh index 53e8c67ca00..24143196fd2 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Replacing a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh index 4926fb7b319..27ac38606c2 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh @@ -48,6 +48,7 @@ verify_runnable "global" log_assert "Exporting and importing pool with log devices passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh index 587e0e32122..54587a0c61a 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log is not supported." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh index e7091f17b75..222f71a9992 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log can not be added to existed pool." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh index 8fe248ffbcb..edd9abea093 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Slog device can not be replaced with spare device." log_onexit cleanup +log_must setup log_must zpool create $TESTPOOL $VDEV spare $SDEV log $LDEV sdev=$(random_get $SDEV) diff --git a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh index 2dad200b31c..3bebc820171 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Offline and online a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh index 45566d427f1..8d6fb2bffb7 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Pool can survive when one of mirror log device get corrupted." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh index bbe5adc2417..d6917065ddb 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh @@ -60,6 +60,7 @@ log_assert "Verify slog device can be disk, file, lofi device or any device " \ "that presents a block interface." verify_disk_count "$DISKS" 2 log_onexit cleanup_testenv +log_must setup dsk1=${DISKS%% *} log_must zpool create $TESTPOOL ${DISKS#$dsk1} diff --git a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh index 0ec96ae1e6f..e8ea29f1ffa 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "log device can survive when one of the pool device get corrupted." +log_must setup for type in "mirror" "raidz" "raidz2"; do for spare in "" "spare"; do diff --git a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh index 37821888ea0..fa610511657 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh @@ -47,6 +47,7 @@ function cleanup ORIG_TIMEOUT=$(get_tunable zfs_commit_timeout_pct | tail -1 | awk '{print $NF}') log_onexit cleanup +log_must setup for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do log_must set_tunable64 zfs_commit_timeout_pct $PCT diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh index ea3f8451b9e..3e5bccd2ef1 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh @@ -66,6 +66,7 @@ function cleanup_fs log_assert "Replay of intent log succeeds." log_onexit cleanup_fs +log_must setup # # 1. Create an empty file system (TESTFS) diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index c8a3cbbf43c..a72c83b5bfc 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -76,6 +76,7 @@ function cleanup_volume log_assert "Replay of intent log succeeds." log_onexit cleanup_volume +log_must setup # # 1. Create an empty volume (TESTVOL), set sync=always, and format From 95319fc569cf1ab322926f037b92dd4fd15b5630 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Tue, 27 Aug 2019 12:55:51 -0400 Subject: [PATCH 129/325] Fix deadlock in 'zfs rollback' Currently, the 'zfs rollback' code can end up deadlocked due to the way the kernel handles unreferenced inodes on a suspended fs. Essentially, the zfs_resume_fs() code path may cause zfs to spawn new threads as it reinstantiates the suspended fs's zil. When a new thread is spawned, the kernel may attempt to free memory for that thread by freeing some unreferenced inodes. If it happens to select inodes that are a a part of the suspended fs a deadlock will occur because freeing inodes requires holding the fs's z_teardown_inactive_lock which is still held from the suspend. This patch corrects this issue by adding an additional reference to all inodes that are still present when a suspend is initiated. This prevents them from being freed by the kernel for any reason. Reviewed-by: Alek Pinchuk Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #9203 --- include/sys/zfs_znode.h | 1 + module/zfs/zfs_vfsops.c | 16 +++++++++++++++- module/zfs/zfs_znode.c | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index add45a7f46e..01b358cc4da 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -196,6 +196,7 @@ typedef struct znode { uint8_t z_atime_dirty; /* atime needs to be synced */ uint8_t z_zn_prefetch; /* Prefetch znodes? */ uint8_t z_moved; /* Has this znode been moved? */ + boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ uint64_t z_mapcnt; /* number of pages mapped to file */ diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 371c412f6be..489f12b7fc0 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1736,7 +1736,12 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * - * Release all holds on dbufs. + * Release all holds on dbufs. We also grab an extra reference to all + * the remaining inodes so that the kernel does not attempt to free + * any inodes of a suspended fs. This can cause deadlocks since the + * zfs_resume_fs() process may involve starting threads, which might + * attempt to free unreferenced inodes to free up memory for the new + * thread. */ if (!unmounting) { mutex_enter(&zfsvfs->z_znodes_lock); @@ -1744,6 +1749,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zp = list_next(&zfsvfs->z_all_znodes, zp)) { if (zp->z_sa_hdl) zfs_znode_dmu_fini(zp); + if (igrab(ZTOI(zp)) != NULL) + zp->z_suspended = B_TRUE; + } mutex_exit(&zfsvfs->z_znodes_lock); } @@ -2192,6 +2200,12 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } + + /* see comment in zfs_suspend_fs() */ + if (zp->z_suspended) { + zfs_iput_async(ZTOI(zp)); + zp->z_suspended = B_FALSE; + } } mutex_exit(&zfsvfs->z_znodes_lock); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 3dd29994220..91162e857d4 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_moved = 0; + zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; From ea34735203a259f331dc549c25c7ed92b34cd470 Mon Sep 17 00:00:00 2001 From: Richard Allen <33836503+belperite@users.noreply.github.com> Date: Tue, 27 Aug 2019 21:44:02 +0100 Subject: [PATCH 130/325] Fix Plymouth passphrase prompt in initramfs script Entering the ZFS encryption passphrase under Plymouth wasn't working because in the ZFS initrd script, Plymouth was calling zfs via "--command", which wasn't passing through the filesystem argument to zfs load-key properly (it was passing through the single quotes around the filesystem name intended to handle spaces literally, which zfs load-key couldn't understand). Reviewed-by: Richard Laager Reviewed-by: Garrett Fields Signed-off-by: Richard Allen Issue #9193 Closes #9202 --- contrib/initramfs/scripts/zfs.in | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index ad604a82ce5..05410ea2bdc 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -411,29 +411,29 @@ decrypt_fs() # Determine dataset that holds key for root dataset ENCRYPTIONROOT=$(${ZFS} get -H -o value encryptionroot "${fs}") - DECRYPT_CMD="${ZFS} load-key '${ENCRYPTIONROOT}'" # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - + TRY_COUNT=3 # Prompt with plymouth, if active if [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then - plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" \ - --number-of-tries="3" \ - --command="${DECRYPT_CMD}" + while [ $TRY_COUNT -gt 0 ]; do + plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \ + $ZFS load-key "${ENCRYPTIONROOT}" && break + TRY_COUNT=$((TRY_COUNT - 1)) + done # Prompt with systemd, if active elif [ -e /run/systemd/system ]; then - TRY_COUNT=3 while [ $TRY_COUNT -gt 0 ]; do systemd-ask-password "Encrypted ZFS password for ${ENCRYPTIONROOT}" --no-tty | \ - ${DECRYPT_CMD} && break + $ZFS load-key "${ENCRYPTIONROOT}" && break TRY_COUNT=$((TRY_COUNT - 1)) done # Prompt with ZFS tty, otherwise else - eval "${DECRYPT_CMD}" + $ZFS load-key "${ENCRYPTIONROOT}" fi fi fi From 931bef81c8a4bda13e22be770c1dca3721dffc0f Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 27 Aug 2019 23:45:53 +0300 Subject: [PATCH 131/325] zfs_ioc_snapshot: check user-prop permissions on snapshotted datasets Previously, the permissions were checked on the pool which was obviously incorrect. After this change, zfs_check_userprops() only validates the properties without any permission checks. The permissions are checked individually for each snapshotted dataset. Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Andriy Gapon Closes #9179 Closes #9180 --- module/zfs/zfs_ioctl.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 152433d6079..ac573ccbf17 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2744,10 +2744,9 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(const char *fsname, nvlist_t *nvl) +zfs_check_userprops(nvlist_t *nvl) { nvpair_t *pair = NULL; - int error = 0; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); @@ -2756,10 +2755,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl) nvpair_type(pair) != DATA_TYPE_STRING) return (SET_ERROR(EINVAL)); - if ((error = zfs_secpolicy_write_perms(fsname, - ZFS_DELEG_PERM_USERPROP, CRED()))) - return (error); - if (strlen(propname) >= ZAP_MAXNAMELEN) return (SET_ERROR(ENAMETOOLONG)); @@ -3473,19 +3468,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *pair; (void) nvlist_lookup_nvlist(innvl, "props", &props); - if ((error = zfs_check_userprops(poolname, props)) != 0) - return (error); - if (!nvlist_empty(props) && zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) return (SET_ERROR(ENOTSUP)); + if ((error = zfs_check_userprops(props)) != 0) + return (error); snaps = fnvlist_lookup_nvlist(innvl, "snaps"); poollen = strlen(poolname); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); - const char *cp = strchr(name, '@'); + char *cp = strchr(name, '@'); /* * The snap name must contain an @, and the part after it must @@ -3502,6 +3496,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) (name[poollen] != '/' && name[poollen] != '@')) return (SET_ERROR(EXDEV)); + /* + * Check for permission to set the properties on the fs. + */ + if (!nvlist_empty(props)) { + *cp = '\0'; + error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_USERPROP, CRED()); + *cp = '@'; + if (error != 0) + return (error); + } + /* This must be the only snap of this fs. */ for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { From c7a4255f128cc493df8383cb9f1ed650191b2081 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Wed, 28 Aug 2019 10:42:02 -0700 Subject: [PATCH 132/325] Fix zil replay panic when TX_REMOVE followed by TX_CREATE If TX_REMOVE is followed by TX_CREATE on the same object id, we need to make sure the object removal is completely finished before creation. The current implementation relies on dnode_hold_impl with DNODE_MUST_BE_ALLOCATED returning ENOENT. While this check seems to work fine before, in current version it does not guarantee the object removal is completed. We fix this by checking if DNODE_MUST_BE_FREE returns successful instead. Also add test and remove dead code in dnode_hold_impl. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: Chunwei Chen Closes #7151 Closes #8910 Closes #9123 Closes #9145 --- include/sys/dnode.h | 7 +- module/zfs/dnode.c | 49 +++++-- module/zfs/zfs_replay.c | 8 +- tests/runfiles/linux.run | 4 +- .../tests/functional/slog/Makefile.am | 3 +- ...g_replay_fs.ksh => slog_replay_fs_001.ksh} | 0 .../functional/slog/slog_replay_fs_002.ksh | 137 ++++++++++++++++++ 7 files changed, 184 insertions(+), 24 deletions(-) rename tests/zfs-tests/tests/functional/slog/{slog_replay_fs.ksh => slog_replay_fs_001.ksh} (100%) create mode 100755 tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh diff --git a/include/sys/dnode.h b/include/sys/dnode.h index c60258bbc76..e97e40373b4 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -46,6 +46,7 @@ extern "C" { */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 +#define DNODE_DRY_RUN 4 /* * dnode_next_offset() flags. @@ -415,6 +416,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots, boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); +int dnode_try_claim(objset_t *os, uint64_t object, int slots); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, @@ -532,11 +534,6 @@ typedef struct dnode_stats { * a range of dnode slots which would overflow the dnode_phys_t. */ kstat_named_t dnode_hold_free_overflow; - /* - * Number of times a dnode_hold(...) was attempted on a dnode - * which had already been unlinked in an earlier txg. - */ - kstat_named_t dnode_hold_free_txg; /* * Number of times dnode_free_interior_slots() needed to retry * acquiring a slot zrl lock due to contention. diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 5fd473303d7..cc7bc5ec82c 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -55,7 +55,6 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, - { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, @@ -1255,6 +1254,10 @@ dnode_buf_evict_async(void *dbu) * as an extra dnode slot by an large dnode, in which case it returns * ENOENT. * + * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just + * return whether the hold would succeed or not. tag and dnp should set to + * NULL in this case. + * * errors: * EINVAL - Invalid object number or flags. * ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE) @@ -1283,6 +1286,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0)); ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0)); + IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL)); /* * If you are holding the spa config lock as writer, you shouldn't @@ -1312,8 +1316,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) return (SET_ERROR(EEXIST)); DNODE_VERIFY(dn); - (void) zfs_refcount_add(&dn->dn_holds, tag); - *dnp = dn; + /* Don't actually hold if dry run, just return 0 */ + if (!(flag & DNODE_DRY_RUN)) { + (void) zfs_refcount_add(&dn->dn_holds, tag); + *dnp = dn; + } return (0); } @@ -1455,6 +1462,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(ENOENT)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + DNODE_STAT_BUMP(dnode_hold_alloc_hits); } else if (flag & DNODE_MUST_BE_FREE) { @@ -1512,6 +1527,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EEXIST)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR); DNODE_STAT_BUMP(dnode_hold_free_hits); } else { @@ -1519,15 +1542,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EINVAL)); } - if (dn->dn_free_txg) { - DNODE_STAT_BUMP(dnode_hold_free_txg); - type = dn->dn_type; - mutex_exit(&dn->dn_mtx); - dnode_slots_rele(dnc, idx, slots); - dbuf_rele(db, FTAG); - return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ? - ENOENT : EEXIST)); - } + ASSERT0(dn->dn_free_txg); if (zfs_refcount_add(&dn->dn_holds, tag) == 1) dbuf_add_ref(db, dnh); @@ -1618,6 +1633,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) } } +/* + * Test whether we can create a dnode at the specified location. + */ +int +dnode_try_claim(objset_t *os, uint64_t object, int slots) +{ + return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN, + slots, NULL, NULL)); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 14438176905..7dea85bb661 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -337,8 +337,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto bail; if (lr->lr_common.lrc_txtype & TX_CI) @@ -473,8 +473,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, objid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto out; if (lr->lr_common.lrc_txtype & TX_CI) diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 1c368d20c45..0e157cf0e98 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -824,8 +824,8 @@ tags = ['functional', 'scrub_mirror'] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs', - 'slog_replay_volume'] + 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001', + 'slog_replay_fs_002', 'slog_replay_volume'] tags = ['functional', 'slog'] [tests/functional/snapshot] diff --git a/tests/zfs-tests/tests/functional/slog/Makefile.am b/tests/zfs-tests/tests/functional/slog/Makefile.am index 4548ce63b40..33e3a6d3a49 100644 --- a/tests/zfs-tests/tests/functional/slog/Makefile.am +++ b/tests/zfs-tests/tests/functional/slog/Makefile.am @@ -17,7 +17,8 @@ dist_pkgdata_SCRIPTS = \ slog_013_pos.ksh \ slog_014_pos.ksh \ slog_015_neg.ksh \ - slog_replay_fs.ksh \ + slog_replay_fs_001.ksh \ + slog_replay_fs_002.ksh \ slog_replay_volume.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh rename to tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh new file mode 100755 index 00000000000..3c3ccdf4ad2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh @@ -0,0 +1,137 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify slog replay correctly when TX_REMOVEs are followed by +# TX_CREATEs. +# +# STRATEGY: +# 1. Create a file system (TESTFS) with a lot of files +# 2. Freeze TESTFS +# 3. Remove all files then create a lot of files +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare TESTFS against the TESTDIR/copy +# + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Replay of intent log succeeds." +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) with a lot of files +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# Prep for the test of TX_REMOVE followed by TX_CREATE +dnsize=(legacy auto 1k 2k 4k 8k 16k) +NFILES=200 +log_must mkdir /$TESTPOOL/$TESTFS/dir0 +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# Reimport to reset dnode allocation pointer. +# This is to make sure we will have TX_REMOVE and TX_CREATE on same id +# +log_must zpool export $TESTPOOL +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Remove all files then create a lot of files +# +# TX_REMOVE followed by TX_CREATE +log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*' +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# +log_must mkdir -p $TESTDIR/copy +log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/ + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare TESTFS against the TESTDIR/copy +# +log_note "Verify current block usage:" +log_must zdb -bcv $TESTPOOL + +log_note "Verify number of files" +log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES + +log_note "Verify working set diff:" +log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy + +log_pass "Replay of intent log succeeds." From 0e765c4eb89346a77733037a46b32aec85205a19 Mon Sep 17 00:00:00 2001 From: Pavel Zakharov Date: Wed, 28 Aug 2019 18:02:58 -0400 Subject: [PATCH 133/325] zfs_handle used after being closed/freed in change_one callback This is a typical case of use after free. We would call zfs_close(zhp) which would free the handle, and then call zfs_iter_children() on that handle later. This change ensures that the zfs_handle is only closed when we are ready to return. Running `zfs inherit -r sharenfs pool` was failing with an error code without any error messages. After some debugging I've pinpointed the issue to be memory corruption, which would cause zfs to try to issue an ioctl to the wrong device and receive ENOTTY. Reviewed-by: Paul Dagnelie Reviewed-by: George Wilson Reviewed-by: Sebastien Roy Reviewed-by: Brian Behlendorf Reviewed-by: Alek Pinchuk Signed-off-by: Pavel Zakharov Issue #7967 Closes #9165 --- lib/libzfs/libzfs_changelist.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 3101febc160..72f641056ed 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -475,9 +475,10 @@ change_one(zfs_handle_t *zhp, void *data) prop_changelist_t *clp = data; char property[ZFS_MAXPROPLEN]; char where[64]; - prop_changenode_t *cn; + prop_changenode_t *cn = NULL; zprop_source_t sourcetype = ZPROP_SRC_NONE; zprop_source_t share_sourcetype = ZPROP_SRC_NONE; + int ret = 0; /* * We only want to unmount/unshare those filesystems that may inherit @@ -493,8 +494,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_prop, property, sizeof (property), &sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } /* @@ -506,8 +506,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_shareprop, property, sizeof (property), &share_sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } if (clp->cl_alldependents || clp->cl_allchildren || @@ -518,8 +517,8 @@ change_one(zfs_handle_t *zhp, void *data) share_sourcetype == ZPROP_SRC_INHERITED))) { if ((cn = zfs_alloc(zfs_get_handle(zhp), sizeof (prop_changenode_t))) == NULL) { - zfs_close(zhp); - return (-1); + ret = -1; + goto out; } cn->cn_handle = zhp; @@ -541,16 +540,23 @@ change_one(zfs_handle_t *zhp, void *data) uu_avl_insert(clp->cl_tree, cn, idx); } else { free(cn); - zfs_close(zhp); + cn = NULL; } if (!clp->cl_alldependents) - return (zfs_iter_children(zhp, change_one, data)); - } else { - zfs_close(zhp); + ret = zfs_iter_children(zhp, change_one, data); + + /* + * If we added the handle to the changelist, we will re-use it + * later so return without closing it. + */ + if (cn != NULL) + return (ret); } - return (0); +out: + zfs_close(zhp); + return (ret); } static int From 3cf4ecb03fecca9d9a326c32e8f1f7573a93a8e3 Mon Sep 17 00:00:00 2001 From: Georgy Yakovlev <168902+gyakovlev@users.noreply.github.com> Date: Thu, 29 Aug 2019 12:14:48 -0800 Subject: [PATCH 134/325] etc/init.d/zfs-functions.in: remove arch warning Remove the x86_64 warning, it's no longer the case that this is the only supported architecture. Reviewed-by: Brian Behlendorf Signed-off-by: Georgy Yakovlev Closes: #9177 --- etc/init.d/zfs-functions.in | 7 ------- 1 file changed, 7 deletions(-) diff --git a/etc/init.d/zfs-functions.in b/etc/init.d/zfs-functions.in index 490503e9139..cbc7fd22a0a 100644 --- a/etc/init.d/zfs-functions.in +++ b/etc/init.d/zfs-functions.in @@ -294,13 +294,6 @@ checksystem() # Just make sure that /dev/zfs is created. udev_trigger - if ! [ "$(uname -m)" = "x86_64" ]; then - echo "Warning: You're not running 64bit. Currently native zfs in"; - echo " Linux is only supported and tested on 64bit."; - # should we break here? People doing this should know what they - # do, thus i'm not breaking here. - fi - return 0 } From 13e5e396a31df268cba6571a800abe9e54c47db4 Mon Sep 17 00:00:00 2001 From: loli10K Date: Tue, 3 Sep 2019 19:36:33 +0200 Subject: [PATCH 135/325] Fix Intel QAT / ZFS compatibility on v4.7.1+ kernels This change use the compat code introduced in 9cc1844a. Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9268 Closes #9269 --- module/zfs/qat_compress.c | 2 +- module/zfs/qat_crypt.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 1c5c0a4e725..b3c8c162167 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -547,7 +547,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, } static int -param_set_qat_compress(const char *val, struct kernel_param *kp) +param_set_qat_compress(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c index 34c19b5823a..2170366df14 100644 --- a/module/zfs/qat_crypt.c +++ b/module/zfs/qat_crypt.c @@ -578,7 +578,7 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) } static int -param_set_qat_encrypt(const char *val, struct kernel_param *kp) +param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; @@ -600,7 +600,7 @@ param_set_qat_encrypt(const char *val, struct kernel_param *kp) } static int -param_set_qat_checksum(const char *val, struct kernel_param *kp) +param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; From beb21db3c6ac503a43ef7c6532d099c056f89f5b Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 3 Sep 2019 20:56:55 +0300 Subject: [PATCH 136/325] Always refuse receving non-resume stream when resume state exists This fixes a hole in the situation where the resume state is left from receiving a new dataset and, so, the state is set on the dataset itself (as opposed to %recv child). Additionally, distinguish incremental and resume streams in error messages. Reviewed-by: Matt Ahrens Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Andriy Gapon Closes #9252 --- lib/libzfs/libzfs_sendrecv.c | 15 +++++++++++---- module/zfs/dmu_recv.c | 10 +++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 0d3853e0a1c..d967e043b4e 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -3992,11 +3992,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } } else { /* - * if the fs does not exist, look for it based on the - * fromsnap GUID + * If the fs does not exist, look for it based on the + * fromsnap GUID. */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive incremental stream")); + if (resuming) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive resume stream")); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive incremental stream")); + } (void) strcpy(name, destsnap); *strchr(name, '@') = '\0'; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 3481feb21db..2324e8e87ba 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -86,21 +86,25 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0; boolean_t embed = (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) != 0; - /* temporary clone name must not exist */ + /* Temporary clone name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EBUSY : error); - /* new snapshot name must not exist */ + /* Resume state must not be set. */ + if (dsl_dataset_has_resume_receive_state(ds)) + return (SET_ERROR(EBUSY)); + + /* New snapshot name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EEXIST : error); - /* must not have children if receiving a ZVOL */ + /* Must not have children if receiving a ZVOL. */ error = zap_count(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &children); if (error != 0) From 38528476bf0b64e7462a1141ff73d016a94f3471 Mon Sep 17 00:00:00 2001 From: Pavel Zakharov Date: Wed, 17 Jul 2019 18:33:05 -0400 Subject: [PATCH 137/325] New service that waits on zvol links to be created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zfs-volume-wait.service scans existing zvols and waits for their links under /dev to be created. Any service that depends on zvol links to be there should add a dependency on zfs-volumes.target. By default, this target is not enabled. Reviewed-by: Fabian Grünbichler Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Reviewed-by: loli10K Reviewed-by: John Gallagher Reviewed-by: George Wilson Reviewed-by: Brian Behlendorf Signed-off-by: Pavel Zakharov Closes #8975 --- cmd/Makefile.am | 2 +- cmd/zvol_wait/Makefile.am | 1 + cmd/zvol_wait/zvol_wait | 93 +++++++++++++++++++ configure.ac | 1 + etc/systemd/system/50-zfs.preset.in | 1 + etc/systemd/system/Makefile.am | 4 + etc/systemd/system/zfs-volume-wait.service.in | 13 +++ etc/systemd/system/zfs-volumes.target.in | 7 ++ man/man1/Makefile.am | 2 +- man/man1/zvol_wait.1 | 21 +++++ rpm/generic/zfs.spec.in | 3 +- 11 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 cmd/zvol_wait/Makefile.am create mode 100755 cmd/zvol_wait/zvol_wait create mode 100644 etc/systemd/system/zfs-volume-wait.service.in create mode 100644 etc/systemd/system/zfs-volumes.target.in create mode 100644 man/man1/zvol_wait.1 diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 0d990789b0c..88609e455f2 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -5,4 +5,4 @@ if USING_PYTHON SUBDIRS += arcstat arc_summary dbufstat endif -SUBDIRS += mount_zfs zed zvol_id +SUBDIRS += mount_zfs zed zvol_id zvol_wait diff --git a/cmd/zvol_wait/Makefile.am b/cmd/zvol_wait/Makefile.am new file mode 100644 index 00000000000..564031c9799 --- /dev/null +++ b/cmd/zvol_wait/Makefile.am @@ -0,0 +1 @@ +dist_bin_SCRIPTS = zvol_wait diff --git a/cmd/zvol_wait/zvol_wait b/cmd/zvol_wait/zvol_wait new file mode 100755 index 00000000000..d512be41bcb --- /dev/null +++ b/cmd/zvol_wait/zvol_wait @@ -0,0 +1,93 @@ +#!/bin/sh + +count_zvols() { + if [ -z "$zvols" ]; then + echo 0 + else + echo "$zvols" | wc -l + fi +} + +filter_out_zvols_with_links() { + while read -r zvol; do + if [ ! -L "/dev/zvol/$zvol" ]; then + echo "$zvol" + fi + done +} + +filter_out_deleted_zvols() { + while read -r zvol; do + if zfs list "$zvol" >/dev/null 2>&1; then + echo "$zvol" + fi + done +} + +list_zvols() { + zfs list -t volume -H -o name,volmode | while read -r zvol_line; do + name=$(echo "$zvol_line" | awk '{print $1}') + volmode=$(echo "$zvol_line" | awk '{print $2}') + # /dev links are not created for zvols with volmode = "none". + [ "$volmode" = "none" ] || echo "$name" + done +} + +zvols=$(list_zvols) +zvols_count=$(count_zvols) +if [ "$zvols_count" -eq 0 ]; then + echo "No zvols found, nothing to do." + exit 0 +fi + +echo "Testing $zvols_count zvol links" + +outer_loop=0 +while [ "$outer_loop" -lt 20 ]; do + outer_loop=$((outer_loop + 1)) + + old_zvols_count=$(count_zvols) + + inner_loop=0 + while [ "$inner_loop" -lt 30 ]; do + inner_loop=$((inner_loop + 1)) + + zvols="$(echo "$zvols" | filter_out_zvols_with_links)" + + zvols_count=$(count_zvols) + if [ "$zvols_count" -eq 0 ]; then + echo "All zvol links are now present." + exit 0 + fi + sleep 1 + done + + echo "Still waiting on $zvols_count zvol links ..." + # + # Although zvols should normally not be deleted at boot time, + # if that is the case then their links will be missing and + # we would stall. + # + if [ "$old_zvols_count" -eq "$zvols_count" ]; then + echo "No progress since last loop." + echo "Checking if any zvols were deleted." + + zvols=$(echo "$zvols" | filter_out_deleted_zvols) + zvols_count=$(count_zvols) + + if [ "$old_zvols_count" -ne "$zvols_count" ]; then + echo "$((old_zvols_count - zvols_count)) zvol(s) deleted." + fi + + if [ "$zvols_count" -ne 0 ]; then + echo "Remaining zvols:" + echo "$zvols" + else + echo "All zvol links are now present." + exit 0 + fi + fi +done + +echo "Timed out waiting on zvol links" +exit 1 diff --git a/configure.ac b/configure.ac index e8592ffb1d2..a3ac134ffcc 100644 --- a/configure.ac +++ b/configure.ac @@ -123,6 +123,7 @@ AC_CONFIG_FILES([ cmd/zed/zed.d/Makefile cmd/raidz_test/Makefile cmd/zgenhostid/Makefile + cmd/zvol_wait/Makefile contrib/Makefile contrib/bash_completion.d/Makefile contrib/dracut/Makefile diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in index 884a69b5b68..e4056a92cd9 100644 --- a/etc/systemd/system/50-zfs.preset.in +++ b/etc/systemd/system/50-zfs.preset.in @@ -5,4 +5,5 @@ enable zfs-import.target enable zfs-mount.service enable zfs-share.service enable zfs-zed.service +enable zfs-volume-wait.service enable zfs.target diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 1586209caa6..9249f15eb45 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -7,7 +7,9 @@ systemdunit_DATA = \ zfs-import-scan.service \ zfs-mount.service \ zfs-share.service \ + zfs-volume-wait.service \ zfs-import.target \ + zfs-volumes.target \ zfs.target EXTRA_DIST = \ @@ -17,6 +19,8 @@ EXTRA_DIST = \ $(top_srcdir)/etc/systemd/system/zfs-mount.service.in \ $(top_srcdir)/etc/systemd/system/zfs-share.service.in \ $(top_srcdir)/etc/systemd/system/zfs-import.target.in \ + $(top_srcdir)/etc/systemd/system/zfs-volume-wait.service.in \ + $(top_srcdir)/etc/systemd/system/zfs-volumes.target.in \ $(top_srcdir)/etc/systemd/system/zfs.target.in \ $(top_srcdir)/etc/systemd/system/50-zfs.preset.in diff --git a/etc/systemd/system/zfs-volume-wait.service.in b/etc/systemd/system/zfs-volume-wait.service.in new file mode 100644 index 00000000000..75bd9fcdd56 --- /dev/null +++ b/etc/systemd/system/zfs-volume-wait.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=Wait for ZFS Volume (zvol) links in /dev +DefaultDependencies=no +After=systemd-udev-settle.service +After=zfs-import.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@bindir@/zvol_wait + +[Install] +WantedBy=zfs-volumes.target diff --git a/etc/systemd/system/zfs-volumes.target.in b/etc/systemd/system/zfs-volumes.target.in new file mode 100644 index 00000000000..5cb9a10f49c --- /dev/null +++ b/etc/systemd/system/zfs-volumes.target.in @@ -0,0 +1,7 @@ +[Unit] +Description=ZFS volumes are ready +After=zfs-volume-wait.service +Requires=zfs-volume-wait.service + +[Install] +WantedBy=zfs.target diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am index bd78be1452a..2af917fa5c2 100644 --- a/man/man1/Makefile.am +++ b/man/man1/Makefile.am @@ -1,4 +1,4 @@ -dist_man_MANS = zhack.1 ztest.1 raidz_test.1 +dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 EXTRA_DIST = cstyle.1 install-data-local: diff --git a/man/man1/zvol_wait.1 b/man/man1/zvol_wait.1 new file mode 100644 index 00000000000..0366da5376d --- /dev/null +++ b/man/man1/zvol_wait.1 @@ -0,0 +1,21 @@ +.Dd July 5, 2019 +.Dt ZVOL_WAIT 1 SMM +.Os Linux +.Sh NAME +.Nm zvol_wait +.Nd Wait for ZFS volume links in +.Em /dev +to be created. +.Sh SYNOPSIS +.Nm +.Sh DESCRIPTION +When a ZFS pool is imported, ZFS will register each ZFS volume +(zvol) as a disk device with the system. As the disks are registered, +.Xr \fBudev 7\fR +will asynchronously create symlinks under +.Em /dev/zvol +using the zvol's name. +.Nm +will wait for all those symlinks to be created before returning. +.Sh SEE ALSO +.Xr \fBudev 7\fR diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 0864a72a115..4fdf7bb69ec 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -322,7 +322,7 @@ image which is ZFS aware. %if 0%{?_systemd} %define systemd --enable-systemd --with-systemdunitdir=%{_unitdir} --with-systemdpresetdir=%{_presetdir} --with-systemdmodulesloaddir=%{_modulesloaddir} --with-systemdgeneratordir=%{_systemdgeneratordir} --disable-sysvinit - %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target + %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target zfs-volume-wait.service zfs-volumes.target %else %define systemd --enable-sysvinit --disable-systemd %endif @@ -419,6 +419,7 @@ systemctl --system daemon-reload >/dev/null || true %{_sbindir}/* %{_bindir}/raidz_test %{_bindir}/zgenhostid +%{_bindir}/zvol_wait # Optional Python 2/3 scripts %{_bindir}/arc_summary %{_bindir}/arcstat From 5acba22ec0bd934894d746ca967d451fdc6d3368 Mon Sep 17 00:00:00 2001 From: Pavel Zakharov Date: Tue, 3 Sep 2019 14:29:52 -0400 Subject: [PATCH 138/325] zvol_wait script should ignore partially received zvols Partially received zvols won't have links in /dev/zvol. Reviewed-by: Sebastien Roy Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Pavel Zakharov Closes #9260 --- cmd/zvol_wait/zvol_wait | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/cmd/zvol_wait/zvol_wait b/cmd/zvol_wait/zvol_wait index d512be41bcb..e5df82dd376 100755 --- a/cmd/zvol_wait/zvol_wait +++ b/cmd/zvol_wait/zvol_wait @@ -25,11 +25,30 @@ filter_out_deleted_zvols() { } list_zvols() { - zfs list -t volume -H -o name,volmode | while read -r zvol_line; do + zfs list -t volume -H -o name,volmode,receive_resume_token | + while read -r zvol_line; do name=$(echo "$zvol_line" | awk '{print $1}') volmode=$(echo "$zvol_line" | awk '{print $2}') + token=$(echo "$zvol_line" | awk '{print $3}') + # # /dev links are not created for zvols with volmode = "none". - [ "$volmode" = "none" ] || echo "$name" + # + [ "$volmode" = "none" ] && continue + # + # We also also ignore partially received zvols if it is + # not an incremental receive, as those won't even have a block + # device minor node created yet. + # + if [ "$token" != "-" ]; then + # + # Incremental receives create an invisible clone that + # is not automatically displayed by zfs list. + # + if ! zfs list "$name/%recv" >/dev/null 2>&1; then + continue + fi + fi + echo "$name" done } From 9f261b1be681e93158d65fa8e5f2a0553af05b20 Mon Sep 17 00:00:00 2001 From: loli10K Date: Wed, 4 Sep 2019 00:20:39 +0200 Subject: [PATCH 139/325] Fix zfs-dkms .deb package warning in prerm script Debian zfs-dkms package generated by alien doesn't call the prerm script (rpm's %preun) with an integer as first parameter, which results in the following warning when the package is uninstalled: "zfs-dkms.prerm: line 3: [: remove: integer expression expected" Modify the if-condition to avoid the warning. Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9271 --- rpm/generic/zfs-dkms.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index 568bef988ca..d8729368642 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -73,7 +73,7 @@ exit 1 %preun # Are we doing an upgrade? -if [ $1 -ne 0 ] ; then +if [ "$1" = "1" -o "$1" = "upgrade" ] ; then # Yes we are. Are we upgrading to a new ZFS version? NEWEST_VER=$(dkms status zfs | sed 's/,//g' | sort -r -V | awk '/installed/{print $2; exit}') if [ "$NEWEST_VER" != "%{version}" ] ; then From 146d7d8846d532a0ee66454ec0b14d6a511a6228 Mon Sep 17 00:00:00 2001 From: loli10K Date: Wed, 4 Sep 2019 22:36:25 +0200 Subject: [PATCH 140/325] Fix zpool subcommands error message with some unsupported options Both 'detach' and 'online' zpool subcommands, when provided with an unsupported option, forget to print it in the error message: # zpool online -t rpool vda3 invalid option '' usage: online [-e] ... This changes fixes the error message in order to include the actual option that is not supported. Reviewed-by: Ryan Moeller Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9270 --- cmd/zpool/zpool_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index a3c76030d63..b9c7462b618 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -6111,9 +6111,8 @@ zpool_do_detach(int argc, char **argv) int ret; /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { + while ((c = getopt(argc, argv, "")) != -1) { switch (c) { - case 'f': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6342,12 +6341,11 @@ zpool_do_online(int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, "et")) != -1) { + while ((c = getopt(argc, argv, "e")) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; - case 't': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); From 0ae5f0c8d29f2dff2470779cd7e1b4c3cfeaf12b Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Fri, 6 Sep 2019 11:30:07 -0700 Subject: [PATCH 141/325] BuildRequires libtirpc-devel needed for RHEL 8 Building against RHEL 8 requires libtirpc-devel, as with fedora 28. Add rhel8 and centos8 options to the test, to account for that. BuildRequires Originally added for fedora 28 via commit 1a62a305be01972ef1b81469134faa4937836096 Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Olaf Faaland Closes #9289 --- rpm/generic/zfs.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 4fdf7bb69ec..b9ca5ed5fb7 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -139,7 +139,7 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel -%if 0%{?fedora} >= 28 +%if 0%{?fedora} >= 28 || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif Requires: openssl From 97d4986214e2f1a003f60a931bb6c9dafdead7bf Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 10 Sep 2019 13:42:30 -0700 Subject: [PATCH 142/325] Fix /etc/hostid on root pool deadlock Accidentally introduced by dc04a8c which now takes the SCL_VDEV lock as a reader in zfs_blkptr_verify(). A deadlock can occur if the /etc/hostid file resides on a dataset in the same pool. This is because reading the /etc/hostid file may occur while the caller is holding the SCL_VDEV lock as a writer. For example, to perform a `zpool attach` as shown in the abbreviated stack below. To resolve the issue we cache the system's hostid when initializing the spa_t, or when modifying the multihost property. The cached value is then relied upon for subsequent accesses. Call Trace: spa_config_enter+0x1e8/0x350 [zfs] zfs_blkptr_verify+0x33c/0x4f0 [zfs] <--- trying read lock zio_read+0x6c/0x140 [zfs] ... vfs_read+0xfc/0x1e0 kernel_read+0x50/0x90 ... spa_get_hostid+0x1c/0x38 [zfs] spa_config_generate+0x1a0/0x610 [zfs] vdev_label_init+0xa0/0xc80 [zfs] vdev_create+0x98/0xe0 [zfs] spa_vdev_attach+0x14c/0xb40 [zfs] <--- grabbed write lock Reviewed-by: loli10K Signed-off-by: Brian Behlendorf Closes #9256 Closes #9285 --- include/sys/spa.h | 2 +- include/sys/spa_impl.h | 1 + module/zfs/spa.c | 15 ++-- module/zfs/spa_config.c | 2 +- module/zfs/spa_misc.c | 19 +--- tests/runfiles/linux.run | 2 +- .../tests/functional/mmp/Makefile.am | 1 + .../tests/functional/mmp/mmp_hostid.ksh | 90 +++++++++++++++++++ 8 files changed, 109 insertions(+), 23 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index 23434edbc72..ca63d3a4905 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1104,7 +1104,7 @@ extern uint64_t spa_missing_tvds_allowed(spa_t *spa); extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); -extern unsigned long spa_get_hostid(void); +extern uint32_t spa_get_hostid(spa_t *spa); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern int spa_mode(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 0de8613d3eb..9ab107599fd 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -395,6 +395,7 @@ struct spa { mmp_thread_t spa_mmp; /* multihost mmp thread */ list_t spa_leaf_list; /* list of leaf vdevs */ uint64_t spa_leaf_list_gen; /* track leaf_list changes */ + uint32_t spa_hostid; /* cached system hostid */ /* * spa_refcount & spa_config_lock must be the last elements diff --git a/module/zfs/spa.c b/module/zfs/spa.c index ce622cee88b..4e322e34b08 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -567,8 +567,13 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (!error && intval > 1) error = SET_ERROR(EINVAL); - if (!error && !spa_get_hostid()) - error = SET_ERROR(ENOTSUP); + if (!error) { + uint32_t hostid = zone_get_hostid(NULL); + if (hostid) + spa->spa_hostid = hostid; + else + error = SET_ERROR(ENOTSUP); + } break; @@ -2496,7 +2501,7 @@ spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); - if (hostid == spa_get_hostid()) + if (hostid == spa_get_hostid(spa)) return (B_FALSE); /* @@ -3015,7 +3020,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) spa->spa_config); if (activity_check) { if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && - spa_get_hostid() == 0) { + spa_get_hostid(spa) == 0) { nvlist_free(label); fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); @@ -3695,7 +3700,7 @@ spa_ld_load_vdev_metadata(spa_t *spa) * be imported when the system hostid is zero. The exception to * this rule is zdb which is always allowed to access pools. */ - if (spa_multihost(spa) && spa_get_hostid() == 0 && + if (spa_multihost(spa) && spa_get_hostid(spa) == 0 && (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 6c0894338e2..8c7c14999da 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -457,7 +457,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment); - hostid = spa_get_hostid(); + hostid = spa_get_hostid(spa); if (hostid != 0) fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index a111a9e4e61..185b7020148 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -658,6 +658,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_proc = &p0; spa->spa_proc_state = SPA_PROC_NONE; spa->spa_trust_config = B_TRUE; + spa->spa_hostid = zone_get_hostid(NULL); spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); @@ -2540,22 +2541,10 @@ spa_multihost(spa_t *spa) return (spa->spa_multihost ? B_TRUE : B_FALSE); } -unsigned long -spa_get_hostid(void) +uint32_t +spa_get_hostid(spa_t *spa) { - unsigned long myhostid; - -#ifdef _KERNEL - myhostid = zone_get_hostid(NULL); -#else /* _KERNEL */ - /* - * We're emulating the system's hostid in userland, so - * we can't use zone_get_hostid(). - */ - (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); -#endif /* _KERNEL */ - - return (myhostid); + return (spa->spa_hostid); } boolean_t diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 0e157cf0e98..ff98661ec79 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -657,7 +657,7 @@ tags = ['functional', 'mmap'] tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', - 'mmp_on_zdb', 'mmp_write_distribution'] + 'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid'] tags = ['functional', 'mmp'] [tests/functional/mount] diff --git a/tests/zfs-tests/tests/functional/mmp/Makefile.am b/tests/zfs-tests/tests/functional/mmp/Makefile.am index e39a0a5aac8..2848fd4ce69 100644 --- a/tests/zfs-tests/tests/functional/mmp/Makefile.am +++ b/tests/zfs-tests/tests/functional/mmp/Makefile.am @@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \ mmp_reset_interval.ksh \ mmp_on_zdb.ksh \ mmp_write_distribution.ksh \ + mmp_hostid.ksh \ setup.ksh \ cleanup.ksh diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh new file mode 100755 index 00000000000..b492b1070ca --- /dev/null +++ b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify the hostid file can reside on a ZFS dataset. +# +# STRATEGY: +# 1. Create a non-redundant pool +# 2. Create an 'etc' dataset containing a valid hostid file +# 3. Create a file so the pool will have some contents +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +# 5. Verify vdevs may be attached and detached +# 6. Verify normal, cache, log and special vdevs can be added +# 7. Verify normal, cache, and log vdevs can be removed +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must rm $MMP_DIR/file.{0,1,2,3,4,5} + log_must rmdir $MMP_DIR + log_must mmp_clear_hostid +} + +log_assert "Verify hostid file can reside on a ZFS dataset" +log_onexit cleanup + +log_must mkdir -p $MMP_DIR +log_must truncate -s $MINVDEVSIZE $MMP_DIR/file.{0,1,2,3,4,5} + +# 1. Create a non-redundant pool +log_must zpool create $MMP_POOL $MMP_DIR/file.0 + +# 2. Create an 'etc' dataset containing a valid hostid file; caching is +# disabled on the dataset to force the hostid to be read from disk. +log_must zfs create -o primarycache=none -o secondarycache=none $MMP_POOL/etc +mntpnt_etc=$(get_prop mountpoint $MMP_POOL/etc) +log_must mmp_set_hostid $HOSTID1 +log_must mv $HOSTID_FILE $mntpnt_etc/hostid + +# 3. Create a file so the pool will have some contents +log_must zfs create $MMP_POOL/fs +mntpnt_fs=$(get_prop mountpoint $MMP_POOL/fs) +log_must mkfile 1M $fs_mntpnt/file + +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +log_mustnot zpool set multihost=on $MMP_POOL +log_must ln -s $mntpnt_etc/hostid $HOSTID_FILE +log_must zpool set multihost=on $MMP_POOL + +# 5. Verify vdevs may be attached and detached +log_must zpool attach $MMP_POOL $MMP_DIR/file.0 $MMP_DIR/file.1 +log_must zpool detach $MMP_POOL $MMP_DIR/file.1 + +# 6. Verify normal, cache, log and special vdevs can be added +log_must zpool add $MMP_POOL $MMP_DIR/file.1 +log_must zpool add $MMP_POOL $MMP_DIR/file.2 +log_must zpool add $MMP_POOL cache $MMP_DIR/file.3 +log_must zpool add $MMP_POOL log $MMP_DIR/file.4 +log_must zpool add $MMP_POOL special $MMP_DIR/file.5 + +# 7. Verify normal, cache, and log vdevs can be removed +log_must zpool remove $MMP_POOL $MMP_DIR/file.2 +log_must zpool remove $MMP_POOL $MMP_DIR/file.3 +log_must zpool remove $MMP_POOL $MMP_DIR/file.4 + +log_pass "Verify hostid file can reside on a ZFS dataset." From e17445d1f70600c22cd319765c0e403d5f9d5024 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 11 Sep 2019 11:14:50 -0700 Subject: [PATCH 143/325] kmodtool: depmod path Determine the location of depmod on the system, either /sbin/depmod or /usr/sbin/depmod. Then use that path when generating the specfile. Additionally, update the Requires lines to reference the package which provides depmod rather than the binary itself. For CentOS/RHEL 7+8 and all supported Fedora releases this is the kmod package, and for CentOS/RHEL 6 it is the module-init-tools package. Reviewed-by: Minh Diep Signed-off-by: Olaf Faaland Signed-off-by: Brian Behlendorf Closes #8724 Closes #9310 --- scripts/kmodtool | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/scripts/kmodtool b/scripts/kmodtool index a632dd046b5..9298d6d27df 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -144,7 +144,13 @@ print_rpmtemplate_per_kmodpkg () local kernel_uname_r=${1} local kernel_variant="${2:+-${2}}" - # first part + # Detect depmod install location + local depmod_path=/sbin/depmod + if [ ! -f ${depmod_path} ]; then + depmod_path=/usr/sbin/depmod + fi + + # first part cat <= %{?epoch:%{epoch}:}%{version} -Requires(post): ${prefix}/sbin/depmod -Requires(postun): ${prefix}/sbin/depmod + +%if 0%{?rhel} == 6 || 0%{?centos} == 6 +Requires(post): module-init-tools +Requires(postun): module-init-tools +%else +Requires(post): kmod +Requires(postun): kmod +%endif EOF if [[ ${obsolete_name} ]]; then @@ -170,17 +182,17 @@ BuildRequires: kernel-devel-uname-r = ${kernel_uname_r} %{?KmodsRequires:Requires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %{?KmodsRequires:BuildRequires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %post -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : +${prefix}${depmod_path} -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : +${prefix}${depmod_path} -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : EOF else cat < /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : EOF fi From 9fa8b5b55b44f1d860b05587bff1dccd896cb77b Mon Sep 17 00:00:00 2001 From: Chengfei ZHu Date: Fri, 13 Sep 2019 04:33:44 +0800 Subject: [PATCH 144/325] QAT related bug fixes 1. Fix issue: Kernel BUG with QAT during decompression #9276. Now it is uninterruptible for a specific given QAT request, but Ctrl-C interrupt still works in user-space process. 2. Copy the digest result to the buffer only when doing encryption, and vise-versa for decryption. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Chengfei Zhu Closes #9276 Closes #9303 --- module/zfs/qat.c | 2 +- module/zfs/qat.h | 5 ----- module/zfs/qat_compress.c | 14 +++----------- module/zfs/qat_crypt.c | 29 ++++++++++++++--------------- 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/module/zfs/qat.c b/module/zfs/qat.c index a6f024cb44d..08613b3a204 100644 --- a/module/zfs/qat.c +++ b/module/zfs/qat.c @@ -21,7 +21,7 @@ #if defined(_KERNEL) && defined(HAVE_QAT) #include -#include "qat.h" +#include qat_stats_t qat_stats = { { "comp_requests", KSTAT_DATA_UINT64 }, diff --git a/module/zfs/qat.h b/module/zfs/qat.h index 9014c03148b..5c1cd15d09d 100644 --- a/module/zfs/qat.h +++ b/module/zfs/qat.h @@ -40,11 +40,6 @@ typedef enum qat_encrypt_dir { #include "dc/cpa_dc.h" #include "lac/cpa_cy_sym.h" -/* - * Timeout - no response from hardware after 0.5 seconds - */ -#define QAT_TIMEOUT_MS 500 - /* * The minimal and maximal buffer size which are not restricted * in the QAT hardware, but with the input buffer size between 4KB diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index b3c8c162167..46ccb997a3b 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -28,7 +28,7 @@ #include #include #include -#include "qat.h" +#include /* * Max instances in a QAT device, each instance is a channel to submit @@ -404,11 +404,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; @@ -463,11 +459,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c index 2170366df14..1e77f143e3e 100644 --- a/module/zfs/qat_crypt.c +++ b/module/zfs/qat_crypt.c @@ -36,7 +36,7 @@ #include #include "lac/cpa_cy_im.h" #include "lac/cpa_cy_common.h" -#include "qat.h" +#include /* * Max instances in a QAT device, each instance is a channel to submit @@ -415,6 +415,9 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, op_data.messageLenToCipherInBytes = enc_len; op_data.ivLenInBytes = ZIO_DATA_IV_LEN; bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN); + /* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */ + if (dir == QAT_DECRYPT) + bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN); cb.verify_result = CPA_FALSE; init_completion(&cb.complete); @@ -423,23 +426,21 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; } - /* save digest result to digest_buf */ - bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); - if (dir == QAT_ENCRYPT) + if (dir == QAT_ENCRYPT) { + /* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */ + bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); QAT_STAT_INCR(encrypt_total_out_bytes, enc_len); - else + } else { QAT_STAT_INCR(decrypt_total_out_bytes, enc_len); + } fail: if (status != CPA_STATUS_SUCCESS) @@ -549,11 +550,9 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); + if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; From 63d8f57fe794dadc629c430470545b636665c1b6 Mon Sep 17 00:00:00 2001 From: loli10K Date: Sat, 14 Sep 2019 03:09:59 +0200 Subject: [PATCH 145/325] Scrubbing root pools may deadlock on kernels without elevator_change() (#9321) Originally the zfs_vdev_elevator module option was added as a convenience so the requested elevator would be automatically set on the underlying block devices. At the time this was simple because the kernel provided an API function which did exactly this. This API was then removed in the Linux 4.12 kernel which prompted us to add compatibly code to set the elevator via a usermodehelper. Unfortunately changing the evelator via usermodehelper requires reading some userland binaries, most notably modprobe(8) or sh(1), from a zfs dataset on systems with root-on-zfs. This can deadlock the system if used during the following call path because it may need, if the data is not already cached in the ARC, reading directly from disk while holding the spa config lock as a writer: zfs_ioc_pool_scan() -> spa_scan() -> spa_scan() -> vdev_reopen() -> vdev_elevator_switch() -> call_usermodehelper() While the usermodehelper waits sh(1), modprobe(8) is blocked in the ZIO pipeline trying to read from disk: INFO: task modprobe:2650 blocked for more than 10 seconds. Tainted: P OE 5.2.14 modprobe D 0 2650 206 0x00000000 Call Trace: ? __schedule+0x244/0x5f0 schedule+0x2f/0xa0 cv_wait_common+0x156/0x290 [spl] ? do_wait_intr_irq+0xb0/0xb0 spa_config_enter+0x13b/0x1e0 [zfs] zio_vdev_io_start+0x51d/0x590 [zfs] ? tsd_get_by_thread+0x3b/0x80 [spl] zio_nowait+0x142/0x2f0 [zfs] arc_read+0xb2d/0x19d0 [zfs] ... zpl_iter_read+0xfa/0x170 [zfs] new_sync_read+0x124/0x1b0 vfs_read+0x91/0x140 ksys_read+0x59/0xd0 do_syscall_64+0x4f/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This commit changes how we use the usermodehelper functionality from synchronous (UMH_WAIT_PROC) to asynchronous (UMH_NO_WAIT) which prevents scrubs, and other vdev_elevator_switch() consumers, from triggering the aforementioned issue. Signed-off-by: Brian Behlendorf Signed-off-by: loli10K Issue #8664 Closes #9321 --- module/zfs/vdev_disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 1686ddfce77..46437f21fb7 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -220,7 +220,7 @@ vdev_elevator_switch(vdev_t *v, char *elevator) char *envp[] = { NULL }; argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); - error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + error = call_usermodehelper(argv[0], argv, envp, UMH_NO_WAIT); strfree(argv[2]); #endif /* HAVE_ELEVATOR_CHANGE */ if (error) { From 12a78fbb4fcbba6c4c8d9b0aa34d23e33107b0ae Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 18 Sep 2019 19:04:45 +0300 Subject: [PATCH 146/325] Fix dsl_scan_ds_clone_swapped logic The was incorrect with respect to swapping dataset IDs both in the on-disk ZAP object and the in-memory queue. In both cases, if ds1 was already present, then it would be first replaced with ds2 and then ds would be replaced back with ds1. Also, both cases did not properly handle a situation where both ds1 and ds2 are already queued. A duplicate insertion would be attempted and its failure would result in a panic. Reviewed-by: Matt Ahrens Reviewed-by: Tom Caputi Signed-off-by: Andriy Gapon Closes #9140 Closes #9163 --- module/zfs/dsl_scan.c | 100 +++++++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 31 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 04a439fad5c..9ccb17b7e14 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -2165,16 +2165,17 @@ ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2, } /* - * Called when a parent dataset and its clone are swapped. If we were + * Called when an origin dataset and its clone are swapped. If we were * currently traversing the dataset, we need to switch to traversing the - * newly promoted parent. + * newly promoted clone. */ void dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) { dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; - uint64_t mintxg; + uint64_t mintxg1, mintxg2; + boolean_t ds1_queued, ds2_queued; if (!dsl_scan_is_running(scn)) return; @@ -2182,44 +2183,81 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); - if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) { - scan_ds_queue_remove(scn, ds1->ds_object); - scan_ds_queue_insert(scn, ds2->ds_object, mintxg); + /* + * Handle the in-memory scan queue. + */ + ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1); + ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2); + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } - if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) { + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * The swapping code below would not handle this case correctly, + * since we can't insert ds2 if it is already there. That's + * because scan_ds_queue_insert() prohibits a duplicate insert + * and panics. + */ + } else if (ds1_queued) { + scan_ds_queue_remove(scn, ds1->ds_object); + scan_ds_queue_insert(scn, ds2->ds_object, mintxg1); + } else if (ds2_queued) { scan_ds_queue_remove(scn, ds2->ds_object); - scan_ds_queue_insert(scn, ds1->ds_object, mintxg); + scan_ds_queue_insert(scn, ds1->ds_object, mintxg2); } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds1->ds_object, &mintxg) == 0) { - int err; - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + /* + * Handle the on-disk scan queue. + * The on-disk state is an out-of-date version of the in-memory state, + * so the in-memory and on-disk values for ds1_queued and ds2_queued may + * be different. Therefore we need to apply the swap logic to the + * on-disk state independently of the in-memory state. + */ + ds1_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0; + ds2_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0; + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * Alternatively, we could check for EEXIST from + * zap_add_int_key() and back out to the original state, but + * that would be more work than checking for this case upfront. + */ + } else if (ds1_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); - err = zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); - VERIFY(err == 0 || err == EEXIST); - if (err == EEXIST) { - /* Both were there to begin with */ - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, - ds1->ds_object, mintxg, tx)); - } + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); - } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds2->ds_object, &mintxg) == 0) { - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + } else if (ds2_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds2->ds_object, From c37fa0d5a86c1ce646fcceecfbb069d3dc1dc36d Mon Sep 17 00:00:00 2001 From: Kody A Kantor Date: Sun, 22 Sep 2019 17:25:39 -0500 Subject: [PATCH 147/325] Disabled resilver_defer feature leads to looping resilvers When a disk is replaced with another on a pool with the resilver_defer feature present, but not enabled the resilver activity restarts during each spa_sync. This patch checks to make sure that the resilver_defer feature is first enabled before requesting a deferred resilver. This was originally fixed in illumos-joyent as OS-7982. Reviewed-by: Chris Dunlop Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Reviewed by: Jerry Jelinek Signed-off-by: Kody A Kantor External-issue: illumos-joyent OS-7982 Closes #9299 Closes #9338 --- module/zfs/dsl_scan.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 9ccb17b7e14..202c6e8d8f3 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -23,7 +23,7 @@ * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2017 Datto Inc. - * Copyright 2017 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #include @@ -952,13 +952,16 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) * will find the drives that need to be resilvered * when the machine reboots and start the resilver then. */ - boolean_t resilver_needed = - dsl_scan_clear_deferred(spa->spa_root_vdev, tx); - if (resilver_needed) { - spa_history_log_internal(spa, - "starting deferred resilver", tx, - "errors=%llu", spa_get_errlog_size(spa)); - spa_async_request(spa, SPA_ASYNC_RESILVER); + if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) { + boolean_t resilver_needed = + dsl_scan_clear_deferred(spa->spa_root_vdev, tx); + if (resilver_needed) { + spa_history_log_internal(spa, + "starting deferred resilver", tx, + "errors=%llu", + (u_longlong_t)spa_get_errlog_size(spa)); + spa_async_request(spa, SPA_ASYNC_RESILVER); + } } } From 1222e921c9e3d8f5c693f196435be4604a1187c0 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 23 Aug 2019 15:52:32 -0700 Subject: [PATCH 148/325] Tag zfs-0.8.2 META file and changelog updated. Signed-off-by: Tony Hutter --- META | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/META b/META index d9285b7732e..960a2b73ab3 100644 --- a/META +++ b/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.1 +Version: 0.8.2 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS on Linux -Linux-Maximum: 5.1 +Linux-Maximum: 5.3 Linux-Minimum: 2.6.32 From bee5738f774dcad8fbd1cfb127613aff73a7476c Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 10 Jun 2019 11:48:42 -0700 Subject: [PATCH 149/325] make zil max block size tunable We've observed that on some highly fragmented pools, most metaslab allocations are small (~2-8KB), but there are some large, 128K allocations. The large allocations are for ZIL blocks. If there is a lot of fragmentation, the large allocations can be hard to satisfy. The most common impact of this is that we need to check (and thus load) lots of metaslabs from the ZIL allocation code path, causing sync writes to wait for metaslabs to load, which can take a second or more. In the worst case, we may not be able to satisfy the allocation, in which case the ZIL will resort to txg_wait_synced() to ensure the change is on disk. To provide a workaround for this, this change adds a tunable that can reduce the size of ZIL blocks. External-issue: DLPX-61719 Reviewed-by: George Wilson Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #8865 --- cmd/ztest/ztest.c | 2 +- include/sys/zil.h | 5 ++- include/sys/zil_impl.h | 29 ++++---------- man/man5/zfs-module-parameters.5 | 12 ++++++ module/zfs/zfs_log.c | 11 +++++- module/zfs/zil.c | 67 +++++++++++++++++++++++++++++--- module/zfs/zvol.c | 2 +- 7 files changed, 96 insertions(+), 32 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 3bf840d88ed..49833a42393 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -1692,7 +1692,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) if (zil_replaying(zd->zd_zilog, tx)) return; - if (lr->lr_length > ZIL_MAX_LOG_DATA) + if (lr->lr_length > zil_max_log_data(zd->zd_zilog)) write_state = WR_INDIRECT; itx = zil_itx_create(TX_WRITE, diff --git a/include/sys/zil.h b/include/sys/zil.h index fb7b38a066f..cfa5e399550 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -515,6 +515,9 @@ extern void zil_set_sync(zilog_t *zilog, uint64_t syncval); extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); +extern uint64_t zil_max_copied_data(zilog_t *zilog); +extern uint64_t zil_max_log_data(zilog_t *zilog); + extern int zil_replay_disable; #ifdef __cplusplus diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h index 174fef33412..d2f4018653a 100644 --- a/include/sys/zil_impl.h +++ b/include/sys/zil_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -209,6 +209,13 @@ struct zilog { uint_t zl_prev_rotor; /* rotor for zl_prev[] */ txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */ uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */ + /* + * Max block size for this ZIL. Note that this can not be changed + * while the ZIL is in use because consumers (ZPL/zvol) need to take + * this into account when deciding between WR_COPIED and WR_NEED_COPY + * (see zil_max_copied_data()). + */ + uint64_t zl_max_block_size; }; typedef struct zil_bp_node { @@ -216,26 +223,6 @@ typedef struct zil_bp_node { avl_node_t zn_node; } zil_bp_node_t; -/* - * Maximum amount of write data that can be put into single log block. - */ -#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ - sizeof (lr_write_t)) - -/* - * Maximum amount of log space we agree to waste to reduce number of - * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). - */ -#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8) - -/* - * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY - * as more space efficient if we can't fit at least two log records into - * maximum sized log block. - */ -#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \ - sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t)) - #ifdef __cplusplus } #endif diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 8ad3ce466ce..fa83b44bdf5 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2890,6 +2890,18 @@ value of 100% will create a maximum of one thread per cpu. Default value: \fB100\fR%. .RE +.sp +.ne 2 +.na +\fBzil_maxblocksize\fR (int) +.ad +.RS 12n +This sets the maximum block size used by the ZIL. On very fragmented pools, +lowering this (typically to 36KB) can improve performance. +.sp +Default value: \fB131072\fR (128KB). +.RE + .sp .ne 2 .na diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 5966b7612b3..622ce08acd2 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 by Delphix. All rights reserved. + * Copyright (c) 2015, 2018 by Delphix. All rights reserved. */ @@ -541,7 +541,14 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_wr_state_t wr_state = write_state; ssize_t len = resid; - if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA) + /* + * A WR_COPIED record must fit entirely in one log block. + * Large writes can use WR_NEED_COPY, which the ZIL will + * split into multiple records across several log blocks + * if necessary. + */ + if (wr_state == WR_COPIED && + resid > zil_max_copied_data(zilog)) wr_state = WR_NEED_COPY; else if (wr_state == WR_INDIRECT) len = MIN(blocksize - P2PHASE(off, blocksize), resid); diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 5249a0e9366..98678aa4465 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1423,6 +1423,13 @@ uint64_t zil_block_buckets[] = { UINT64_MAX }; +/* + * Maximum block size used by the ZIL. This is picked up when the ZIL is + * initialized. Otherwise this should not be used directly; see + * zl_max_block_size instead. + */ +int zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE; + /* * Start a log block write and advance to the next log block. * Calls are serialized. @@ -1499,9 +1506,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t); for (i = 0; zil_blksz > zil_block_buckets[i]; i++) continue; - zil_blksz = zil_block_buckets[i]; - if (zil_blksz == UINT64_MAX) - zil_blksz = SPA_OLD_MAXBLOCKSIZE; + zil_blksz = MIN(zil_block_buckets[i], zilog->zl_max_block_size); zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; for (i = 0; i < ZIL_PREV_BLKS; i++) zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); @@ -1562,13 +1567,47 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) return (nlwb); } +/* + * Maximum amount of write data that can be put into single log block. + */ +uint64_t +zil_max_log_data(zilog_t *zilog) +{ + return (zilog->zl_max_block_size - + sizeof (zil_chain_t) - sizeof (lr_write_t)); +} + +/* + * Maximum amount of log space we agree to waste to reduce number of + * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). + */ +static inline uint64_t +zil_max_waste_space(zilog_t *zilog) +{ + return (zil_max_log_data(zilog) / 8); +} + +/* + * Maximum amount of write data for WR_COPIED. For correctness, consumers + * must fall back to WR_NEED_COPY if we can't fit the entire record into one + * maximum sized log block, because each WR_COPIED record must fit in a + * single log block. For space efficiency, we want to fit two records into a + * max-sized log block. + */ +uint64_t +zil_max_copied_data(zilog_t *zilog) +{ + return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 - + sizeof (lr_write_t)); +} + static lwb_t * zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) { lr_t *lrcb, *lrc; lr_write_t *lrwb, *lrw; char *lr_buf; - uint64_t dlen, dnow, lwb_sp, reclen, txg; + uint64_t dlen, dnow, lwb_sp, reclen, txg, max_log_data; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3P(lwb, !=, NULL); @@ -1617,15 +1656,27 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) * For WR_NEED_COPY optimize layout for minimal number of chunks. */ lwb_sp = lwb->lwb_sz - lwb->lwb_nused; + max_log_data = zil_max_log_data(zilog); if (reclen > lwb_sp || (reclen + dlen > lwb_sp && - lwb_sp < ZIL_MAX_WASTE_SPACE && (dlen % ZIL_MAX_LOG_DATA == 0 || - lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) { + lwb_sp < zil_max_waste_space(zilog) && + (dlen % max_log_data == 0 || + lwb_sp < reclen + dlen % max_log_data))) { lwb = zil_lwb_write_issue(zilog, lwb); if (lwb == NULL) return (NULL); zil_lwb_write_open(zilog, lwb); ASSERT(LWB_EMPTY(lwb)); lwb_sp = lwb->lwb_sz - lwb->lwb_nused; + + /* + * There must be enough space in the new, empty log block to + * hold reclen. For WR_COPIED, we need to fit the whole + * record in one block, and reclen is the header size + the + * data size. For WR_NEED_COPY, we can create multiple + * records, splitting the data into multiple blocks, so we + * only need to fit one word of data per block; in this case + * reclen is just the header size (no data). + */ ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp); } @@ -3114,6 +3165,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) zilog->zl_dirty_max_txg = 0; zilog->zl_last_lwb_opened = NULL; zilog->zl_last_lwb_latency = 0; + zilog->zl_max_block_size = zil_maxblocksize; mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3627,5 +3679,8 @@ MODULE_PARM_DESC(zil_nocacheflush, "Disable ZIL cache flushes"); module_param(zil_slog_bulk, ulong, 0644); MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit"); + +module_param(zil_maxblocksize, int, 0644); +MODULE_PARM_DESC(zil_maxblocksize, "Limit in bytes of ZIL log block size"); /* END CSTYLED */ #endif diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 7c7500dbaaf..f74eb28aec8 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -684,7 +684,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, itx_wr_state_t wr_state = write_state; ssize_t len = size; - if (wr_state == WR_COPIED && size > ZIL_MAX_COPIED_DATA) + if (wr_state == WR_COPIED && size > zil_max_copied_data(zilog)) wr_state = WR_NEED_COPY; else if (wr_state == WR_INDIRECT) len = MIN(blocksize - P2PHASE(offset, blocksize), size); From 8805abb8fc1e815337ad6934f837812a147e2388 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 11 Jun 2019 09:02:31 -0700 Subject: [PATCH 150/325] single-chunk scatter ABDs can be treated as linear Scatter ABD's are allocated from a number of pages. In contrast to linear ABD's, these pages are disjoint in the kernel's virtual address space, so they can't be accessed as a contiguous buffer. Therefore routines that need a linear buffer (e.g. abd_borrow_buf() and friends) must allocate a separate linear buffer (with zio_buf_alloc()), and copy the contents of the pages to/from the linear buffer. This can have a measurable performance overhead on some workloads. https://github.com/zfsonlinux/zfs/commit/87c25d567fb7969b44c7d8af63990e ("abd_alloc should use scatter for >1K allocations") increased the use of scatter ABD's, specifically switching 1.5K through 4K (inclusive) buffers from linear to scatter. For workloads that access blocks whose compressed sizes are in this range, that commit introduced an additional copy into the read code path. For example, the sequential_reads_arc_cached tests in the test suite were reduced by around 5% (this is doing reads of 8K-logical blocks, compressed to 3K, which are cached in the ARC). This commit treats single-chunk scattered buffers as linear buffers, because they are contiguous in the kernel's virtual address space. All single-page (4K) ABD's can be represented this way. Some multi-page ABD's can also be represented this way, if we were able to allocate a single "chunk" (higher-order "page" which represents a power-of-2 series of physically-contiguous pages). This is often the case for 2-page (8K) ABD's. Representing a single-entry scatter ABD as a linear ABD has the performance advantage of avoiding the copy (and allocation) in abd_borrow_buf_copy / abd_return_buf_copy. A performance increase of around 5% has been observed for ARC-cached reads (of small blocks which can take advantage of this), fixing the regression introduced by 87c25d567. Note that this optimization is only possible because all physical memory is always mapped into the kernel's address space. This is not the case for HIGHMEM pages, so the optimization can not be made on 32-bit systems. Reviewed-by: Chunwei Chen Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #8580 --- include/sys/abd.h | 13 ++++- module/zfs/abd.c | 120 ++++++++++++++++++++++++++++++---------------- module/zfs/arc.c | 19 +++++--- module/zfs/zio.c | 6 --- 4 files changed, 103 insertions(+), 55 deletions(-) diff --git a/include/sys/abd.h b/include/sys/abd.h index 3d9fdbf102a..b781be4da70 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2014 by Chunwei Chen. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright (c) 2016, 2019 by Delphix. All rights reserved. */ #ifndef _ABD_H @@ -44,7 +44,8 @@ typedef enum abd_flags { ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */ ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */ ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */ - ABD_FLAG_MULTI_CHUNK = 1 << 4 /* pages split over multiple chunks */ + ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */ + ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */ } abd_flags_t; typedef struct abd { @@ -60,6 +61,7 @@ typedef struct abd { } abd_scatter; struct abd_linear { void *abd_buf; + struct scatterlist *abd_sgl; /* for LINEAR_PAGE */ } abd_linear; } abd_u; } abd_t; @@ -75,6 +77,13 @@ abd_is_linear(abd_t *abd) return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE); } +static inline boolean_t +abd_is_linear_page(abd_t *abd) +{ + return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ? + B_TRUE : B_FALSE); +} + /* * Allocations and deallocations */ diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 32b2c842c0d..8b2514404a8 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -72,17 +72,19 @@ * (2) Fragmentation is less of an issue since when we are at the limit of * allocatable space, we won't have to search around for a long free * hole in the VA space for large ARC allocations. Each chunk is mapped in - * individually, so even if we weren't using segkpm (see next point) we + * individually, so even if we are using HIGHMEM (see next point) we * wouldn't need to worry about finding a contiguous address range. * - * (3) Use of segkpm will avoid the need for map / unmap / TLB shootdown costs - * on each ABD access. (If segkpm isn't available then we use all linear - * ABDs to avoid this penalty.) See seg_kpm.c for more details. + * (3) If we are not using HIGHMEM, then all physical memory is always + * mapped into the kernel's address space, so we also avoid the map / + * unmap costs on each ABD access. + * + * If we are not using HIGHMEM, scattered buffers which have only one chunk + * can be treated as linear buffers, because they are contiguous in the + * kernel's virtual address space. See abd_alloc_pages() for details. * * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to - * B_FALSE. However, it is not possible to use scattered ABDs if segkpm is not - * available, which is the case on all 32-bit systems and any 64-bit systems - * where kpm_enable is turned off. + * B_FALSE. * * In addition to directly allocating a linear or scattered ABD, it is also * possible to create an ABD by requesting the "sub-ABD" starting at an offset @@ -249,18 +251,6 @@ abd_chunkcnt_for_bytes(size_t size) #define __GFP_RECLAIM __GFP_WAIT #endif -static unsigned long -abd_alloc_chunk(int nid, gfp_t gfp, unsigned int order) -{ - struct page *page; - - page = alloc_pages_node(nid, gfp, order); - if (!page) - return (0); - - return ((unsigned long) page_address(page)); -} - /* * The goal is to minimize fragmentation by preferentially populating ABDs * with higher order compound pages from a single zone. Allocation size is @@ -283,19 +273,18 @@ abd_alloc_pages(abd_t *abd, size_t size) size_t remaining_size; int nid = NUMA_NO_NODE; int alloc_pages = 0; - int order; INIT_LIST_HEAD(&pages); while (alloc_pages < nr_pages) { - unsigned long paddr; unsigned chunk_pages; + int order; order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order); chunk_pages = (1U << order); - paddr = abd_alloc_chunk(nid, order ? gfp_comp : gfp, order); - if (paddr == 0) { + page = alloc_pages_node(nid, order ? gfp_comp : gfp, order); + if (page == NULL) { if (order == 0) { ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry); schedule_timeout_interruptible(1); @@ -305,7 +294,6 @@ abd_alloc_pages(abd_t *abd, size_t size) continue; } - page = virt_to_page(paddr); list_add_tail(&page->lru, &pages); if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid)) @@ -336,7 +324,41 @@ abd_alloc_pages(abd_t *abd, size_t size) list_del(&page->lru); } - if (chunks > 1) { + /* + * These conditions ensure that a possible transformation to a linear + * ABD would be valid. + */ + ASSERT(!PageHighMem(sg_page(table.sgl))); + ASSERT0(ABD_SCATTER(abd).abd_offset); + + if (table.nents == 1) { + /* + * Since there is only one entry, this ABD can be represented + * as a linear buffer. All single-page (4K) ABD's can be + * represented this way. Some multi-page ABD's can also be + * represented this way, if we were able to allocate a single + * "chunk" (higher-order "page" which represents a power-of-2 + * series of physically-contiguous pages). This is often the + * case for 2-page (8K) ABD's. + * + * Representing a single-entry scatter ABD as a linear ABD + * has the performance advantage of avoiding the copy (and + * allocation) in abd_borrow_buf_copy / abd_return_buf_copy. + * A performance increase of around 5% has been observed for + * ARC-cached reads (of small blocks which can take advantage + * of this). + * + * Note that this optimization is only possible because the + * pages are always mapped into the kernel's address space. + * This is not the case for highmem pages, so the + * optimization can not be made there. + */ + abd->abd_flags |= ABD_FLAG_LINEAR; + abd->abd_flags |= ABD_FLAG_LINEAR_PAGE; + abd->abd_u.abd_linear.abd_sgl = table.sgl; + abd->abd_u.abd_linear.abd_buf = + page_address(sg_page(table.sgl)); + } else if (table.nents > 1) { ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; @@ -344,10 +366,10 @@ abd_alloc_pages(abd_t *abd, size_t size) ABDSTAT_BUMP(abdstat_scatter_page_multi_zone); abd->abd_flags |= ABD_FLAG_MULTI_ZONE; } - } - ABD_SCATTER(abd).abd_sgl = table.sgl; - ABD_SCATTER(abd).abd_nents = table.nents; + ABD_SCATTER(abd).abd_sgl = table.sgl; + ABD_SCATTER(abd).abd_nents = table.nents; + } } #else /* @@ -427,10 +449,6 @@ abd_free_pages(abd_t *abd) struct page; -#define kpm_enable 1 -#define abd_alloc_chunk(o) \ - ((struct page *)umem_alloc_aligned(PAGESIZE << (o), 64, KM_SLEEP)) -#define abd_free_chunk(chunk, o) umem_free(chunk, PAGESIZE << (o)) #define zfs_kmap_atomic(chunk, km) ((void *)chunk) #define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0) #define local_irq_save(flags) do { (void)(flags); } while (0) @@ -491,7 +509,7 @@ abd_alloc_pages(abd_t *abd, size_t size) sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages); abd_for_each_sg(abd, sg, nr_pages, i) { - struct page *p = abd_alloc_chunk(0); + struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); sg_set_page(sg, p, PAGESIZE, 0); } ABD_SCATTER(abd).abd_nents = nr_pages; @@ -502,12 +520,11 @@ abd_free_pages(abd_t *abd) { int i, n = ABD_SCATTER(abd).abd_nents; struct scatterlist *sg; - int j; abd_for_each_sg(abd, sg, n, i) { - for (j = 0; j < sg->length; j += PAGESIZE) { - struct page *p = nth_page(sg_page(sg), j>>PAGE_SHIFT); - abd_free_chunk(p, 0); + for (int j = 0; j < sg->length; j += PAGESIZE) { + struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT); + umem_free(p, PAGESIZE); } } @@ -560,7 +577,7 @@ abd_verify(abd_t *abd) ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | - ABD_FLAG_MULTI_CHUNK)); + ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE)); IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); if (abd_is_linear(abd)) { @@ -613,6 +630,7 @@ abd_alloc(size_t size, boolean_t is_metadata) abd_t *abd = abd_alloc_struct(); abd->abd_flags = ABD_FLAG_OWNER; + abd->abd_u.abd_scatter.abd_offset = 0; abd_alloc_pages(abd, size); if (is_metadata) { @@ -622,8 +640,6 @@ abd_alloc(size_t size, boolean_t is_metadata) abd->abd_parent = NULL; zfs_refcount_create(&abd->abd_children); - abd->abd_u.abd_scatter.abd_offset = 0; - ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, size); ABDSTAT_INCR(abdstat_scatter_chunk_waste, @@ -681,6 +697,17 @@ abd_alloc_linear(size_t size, boolean_t is_metadata) static void abd_free_linear(abd_t *abd) { + if (abd_is_linear_page(abd)) { + /* Transform it back into a scatter ABD for freeing */ + struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; + abd->abd_flags &= ~ABD_FLAG_LINEAR; + abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; + ABD_SCATTER(abd).abd_nents = 1; + ABD_SCATTER(abd).abd_offset = 0; + ABD_SCATTER(abd).abd_sgl = sg; + abd_free_scatter(abd); + return; + } if (abd->abd_flags & ABD_FLAG_META) { zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); } else { @@ -718,7 +745,8 @@ abd_t * abd_alloc_sametype(abd_t *sabd, size_t size) { boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; - if (abd_is_linear(sabd)) { + if (abd_is_linear(sabd) && + !abd_is_linear_page(sabd)) { return (abd_alloc_linear(size, is_metadata)); } else { return (abd_alloc(size, is_metadata)); @@ -966,6 +994,16 @@ abd_release_ownership_of_buf(abd_t *abd) { ASSERT(abd_is_linear(abd)); ASSERT(abd->abd_flags & ABD_FLAG_OWNER); + + /* + * abd_free() needs to handle LINEAR_PAGE ABD's specially. + * Since that flag does not survive the + * abd_release_ownership_of_buf() -> abd_get_from_buf() -> + * abd_take_ownership_of_buf() sequence, we don't allow releasing + * these "linear but not zio_[data_]buf_alloc()'ed" ABD's. + */ + ASSERT(!abd_is_linear_page(abd)); + abd_verify(abd); abd->abd_flags &= ~ABD_FLAG_OWNER; diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 53a44bdaf44..dd382b065a4 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2917,7 +2917,8 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, /* * If the hdr's data can be shared then we share the data buffer and * set the appropriate bit in the hdr's b_flags to indicate the hdr is - * allocate a new buffer to store the buf's data. + * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new + * buffer to store the buf's data. * * There are two additional restrictions here because we're sharing * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be @@ -2925,10 +2926,17 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, * an arc_write() then the hdr's data buffer will be released when the * write completes, even though the L2ARC write might still be using it. * Second, the hdr's ABD must be linear so that the buf's user doesn't - * need to be ABD-aware. - */ - boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd); + * need to be ABD-aware. It must be allocated via + * zio_[data_]buf_alloc(), not as a page, because we need to be able + * to abd_release_ownership_of_buf(), which isn't allowed on "linear + * page" buffers because the ABD code needs to handle freeing them + * specially. + */ + boolean_t can_share = arc_can_share(hdr, buf) && + !HDR_L2_WRITING(hdr) && + hdr->b_l1hdr.b_pabd != NULL && + abd_is_linear(hdr->b_l1hdr.b_pabd) && + !abd_is_linear_page(hdr->b_l1hdr.b_pabd); /* Set up b_data and sharing */ if (can_share) { @@ -3731,7 +3739,6 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, * disk, it's easiest if we just set up sharing between the * buf and the hdr. */ - ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); arc_hdr_free_abd(hdr, B_FALSE); arc_share_buf(hdr, buf); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 94eaa5888a9..f1bf377047a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -345,12 +345,6 @@ zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize, { zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); - /* - * Ensure that anyone expecting this zio to contain a linear ABD isn't - * going to get a nasty surprise when they try to access the data. - */ - IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data)); - zt->zt_orig_abd = zio->io_abd; zt->zt_orig_size = zio->io_size; zt->zt_bufsize = bufsize; From cbb91549586839720131b41b026c9b84be1ddc72 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 13 Jun 2019 13:06:15 -0700 Subject: [PATCH 151/325] looping in metaslab_block_picker impacts performance on fragmented pools On fragmented pools with high-performance storage, the looping in metaslab_block_picker() can become the performance-limiting bottleneck. When looking for a larger block (e.g. a 128K block for the ZIL), we may search through many free segments (up to hundreds of thousands) to find one that is large enough to satisfy the allocation. This can take a long time (up to dozens of ms), and is done while holding the ms_lock, which other threads may spin waiting for. When this performance problem is encountered, profiling will show high CPU time in metaslab_block_picker, as well as in mutex_enter from various callers. The problem is very evident on a test system with a sync write workload with 8K writes to a recordsize=8k filesystem, with 4TB of SSD storage, 84% full and 88% fragmented. It has also been observed on production systems with 90TB of storage, 76% full and 87% fragmented. The fix is to change metaslab_df_alloc() to search only up to 16MB from the previous allocation (of this alignment). After that, we will pick a segment that is of the exact size requested (or larger). This reduces the number of iterations to a few hundred on fragmented pools (a ~100x improvement). Reviewed-by: Brian Behlendorf Reviewed-by: Paul Dagnelie Reviewed-by: Tony Nguyen Reviewed-by: George Wilson Reviewed-by: Serapheim Dimitropoulos Signed-off-by: Matthew Ahrens External-issue: DLPX-62324 Closes #8877 --- man/man5/zfs-module-parameters.5 | 34 ++++++++ module/zfs/metaslab.c | 143 ++++++++++++++++++------------- 2 files changed, 117 insertions(+), 60 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index fa83b44bdf5..77b4c2801e0 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -325,6 +325,40 @@ Enable use of the fragmentation metric in computing metaslab weights. Use \fB1\fR for yes (default) and \fB0\fR for no. .RE +.sp +.ne 2 +.na +\fBmetaslab_df_max_search\fR (int) +.ad +.RS 12n +Maximum distance to search forward from the last offset. Without this limit, +fragmented pools can see >100,000 iterations and metaslab_block_picker() +becomes the performance limiting factor on high-performance storage. + +With the default setting of 16MB, we typically see less than 500 iterations, +even with very fragmented, ashift=9 pools. The maximum number of iterations +possible is: \fBmetaslab_df_max_search / (2 * (1<100,000 iterations and + * metaslab_block_picker() becomes the performance limiting factor on + * high-performance storage. + * + * With the default setting of 16MB, we typically see less than 500 + * iterations, even with very fragmented, ashift=9 pools. The maximum number + * of iterations possible is: + * metaslab_df_max_search / (2 * (1<rs_start, align); + if (rs != NULL) + first_found = rs->rs_start; + while (rs != NULL && rs->rs_start - first_found <= max_search) { + uint64_t offset = rs->rs_start; if (offset + size <= rs->rs_end) { *cursor = offset + size; return (offset); @@ -1224,55 +1250,30 @@ metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, rs = AVL_NEXT(t, rs); } - /* - * If we know we've searched the whole map (*cursor == 0), give up. - * Otherwise, reset the cursor to the beginning and try again. - */ - if (*cursor == 0) - return (-1ULL); - *cursor = 0; - return (metaslab_block_picker(t, cursor, size, align)); -} -#endif /* WITH_FF/DF/CF_BLOCK_ALLOCATOR */ - -#if defined(WITH_FF_BLOCK_ALLOCATOR) -/* - * ========================================================================== - * The first-fit block allocator - * ========================================================================== - */ -static uint64_t -metaslab_ff_alloc(metaslab_t *msp, uint64_t size) -{ - /* - * Find the largest power of 2 block size that evenly divides the - * requested size. This is used to try to allocate blocks with similar - * alignment from the same area of the metaslab (i.e. same cursor - * bucket) but it does not guarantee that other allocations sizes - * may exist in the same region. - */ - uint64_t align = size & -size; - uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1]; - avl_tree_t *t = &msp->ms_allocatable->rt_root; - - return (metaslab_block_picker(t, cursor, size, align)); + return (-1ULL); } - -static metaslab_ops_t metaslab_ff_ops = { - metaslab_ff_alloc -}; - -metaslab_ops_t *zfs_metaslab_ops = &metaslab_ff_ops; -#endif /* WITH_FF_BLOCK_ALLOCATOR */ +#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */ #if defined(WITH_DF_BLOCK_ALLOCATOR) /* * ========================================================================== - * Dynamic block allocator - - * Uses the first fit allocation scheme until space get low and then - * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold - * and metaslab_df_free_pct to determine when to switch the allocation scheme. + * Dynamic Fit (df) block allocator + * + * Search for a free chunk of at least this size, starting from the last + * offset (for this alignment of block) looking for up to + * metaslab_df_max_search bytes (16MB). If a large enough free chunk is not + * found within 16MB, then return a free chunk of exactly the requested size (or + * larger). + * + * If it seems like searching from the last offset will be unproductive, skip + * that and just return a free chunk of exactly the requested size (or larger). + * This is based on metaslab_df_alloc_threshold and metaslab_df_free_pct. This + * mechanism is probably not very useful and may be removed in the future. + * + * The behavior when not searching can be changed to return the largest free + * chunk, instead of a free chunk of exactly the requested size, by setting + * metaslab_df_use_largest_segment. * ========================================================================== */ static uint64_t @@ -1288,28 +1289,42 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) uint64_t align = size & -size; uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1]; range_tree_t *rt = msp->ms_allocatable; - avl_tree_t *t = &rt->rt_root; - uint64_t max_size = metaslab_block_maxsize(msp); int free_pct = range_tree_space(rt) * 100 / msp->ms_size; + uint64_t offset; ASSERT(MUTEX_HELD(&msp->ms_lock)); - ASSERT3U(avl_numnodes(t), ==, + ASSERT3U(avl_numnodes(&rt->rt_root), ==, avl_numnodes(&msp->ms_allocatable_by_size)); - if (max_size < size) - return (-1ULL); - /* - * If we're running low on space switch to using the size - * sorted AVL tree (best-fit). + * If we're running low on space, find a segment based on size, + * rather than iterating based on offset. */ - if (max_size < metaslab_df_alloc_threshold || + if (metaslab_block_maxsize(msp) < metaslab_df_alloc_threshold || free_pct < metaslab_df_free_pct) { - t = &msp->ms_allocatable_by_size; - *cursor = 0; + offset = -1; + } else { + offset = metaslab_block_picker(&rt->rt_root, + cursor, size, metaslab_df_max_search); } - return (metaslab_block_picker(t, cursor, size, 1ULL)); + if (offset == -1) { + range_seg_t *rs; + if (metaslab_df_use_largest_segment) { + /* use largest free segment */ + rs = avl_last(&msp->ms_allocatable_by_size); + } else { + /* use segment of this size, or next largest */ + rs = metaslab_block_find(&msp->ms_allocatable_by_size, + 0, size); + } + if (rs != NULL && rs->rs_start + size <= rs->rs_end) { + offset = rs->rs_start; + *cursor = offset + size; + } + } + + return (offset); } static metaslab_ops_t metaslab_df_ops = { @@ -4823,6 +4838,14 @@ MODULE_PARM_DESC(zfs_metaslab_switch_threshold, module_param(metaslab_force_ganging, ulong, 0644); MODULE_PARM_DESC(metaslab_force_ganging, "blocks larger than this size are forced to be gang blocks"); + +module_param(metaslab_df_max_search, int, 0644); +MODULE_PARM_DESC(metaslab_df_max_search, + "max distance (bytes) to search forward before using size tree"); + +module_param(metaslab_df_use_largest_segment, int, 0644); +MODULE_PARM_DESC(metaslab_df_use_largest_segment, + "when looking in size tree, use largest segment instead of exact fit"); /* END CSTYLED */ #endif From e625030c119e7e64e4fae596658b3153b5022e10 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Sat, 22 Jun 2019 16:51:46 -0700 Subject: [PATCH 152/325] OpenZFS 9425 - channel programs can be interrupted Problem Statement ================= ZFS Channel program scripts currently require a timeout, so that hung or long-running scripts return a timeout error instead of causing ZFS to get wedged. This limit can currently be set up to 100 million Lua instructions. Even with a limit in place, it would be desirable to have a sys admin (support engineer) be able to cancel a script that is taking a long time. Proposed Solution ================= Make it possible to abort a channel program by sending an interrupt signal.In the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to catch the signal. Once a signal is encountered, the dsl_sync_task function can install a Lua hook that will get called before the Lua interpreter executes a new line of code. The dsl_sync_task can resume with a standard txg_wait_sync call and wait for the txg to complete. Meanwhile, the hook will abort the script and indicate that the channel program was canceled. The kernel returns a EINTR to indicate that the channel program run was canceled. Porting notes: Added missing return value from cv_wait_sig() Authored by: Don Brady Reviewed by: Sebastien Roy Reviewed by: Serapheim Dimitropoulos Reviewed by: Matt Ahrens Reviewed by: Sara Hartse Reviewed by: Brian Behlendorf Approved by: Robert Mustacchi Ported-by: Don Brady Signed-off-by: Don Brady OpenZFS-issue: https://www.illumos.org/issues/9425 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926 Closes #8904 --- include/spl/sys/condvar.h | 4 +- include/sys/dsl_synctask.h | 3 + include/sys/txg.h | 5 + include/sys/zcp.h | 31 +++ include/sys/zfs_context.h | 3 +- lib/libzpool/kernel.c | 7 + module/spl/spl-condvar.c | 19 +- module/zfs/dsl_synctask.c | 24 ++- module/zfs/txg.c | 36 +++- module/zfs/zcp.c | 185 ++++++++++-------- tests/runfiles/linux.run | 2 +- .../channel_program/synctask_core/Makefile.am | 3 +- .../synctask_core/tst.terminate_by_signal.ksh | 98 ++++++++++ 13 files changed, 323 insertions(+), 97 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh diff --git a/include/spl/sys/condvar.h b/include/spl/sys/condvar.h index 28caea57181..f1438c4e245 100644 --- a/include/spl/sys/condvar.h +++ b/include/spl/sys/condvar.h @@ -54,7 +54,8 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *); extern void __cv_destroy(kcondvar_t *); extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); -extern void __cv_wait_sig(kcondvar_t *, kmutex_t *); +extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *); +extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); @@ -69,6 +70,7 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_destroy(cvp) __cv_destroy(cvp) #define cv_wait(cvp, mp) __cv_wait(cvp, mp) #define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp) +#define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp) #define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp) #define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp) #define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t) diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h index da6c7a40dac..957963ffe55 100644 --- a/include/sys/dsl_synctask.h +++ b/include/sys/dsl_synctask.h @@ -37,6 +37,7 @@ struct dsl_pool; typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *); typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *); +typedef void (dsl_sigfunc_t)(void *, dmu_tx_t *); typedef enum zfs_space_check { /* @@ -116,6 +117,8 @@ int dsl_early_sync_task(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, void *, int, zfs_space_check_t); void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *, void *, int, zfs_space_check_t, dmu_tx_t *); +int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, + dsl_sigfunc_t *, void *, int, zfs_space_check_t); #ifdef __cplusplus } diff --git a/include/sys/txg.h b/include/sys/txg.h index 760d5208bf4..260a3b43cfe 100644 --- a/include/sys/txg.h +++ b/include/sys/txg.h @@ -87,6 +87,11 @@ extern void txg_kick(struct dsl_pool *dp); */ extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg); +/* + * Wait as above. Returns true if the thread was signaled while waiting. + */ +extern boolean_t txg_wait_synced_sig(struct dsl_pool *dp, uint64_t txg); + /* * Wait until the given transaction group, or one after it, is * the open transaction group. Try to make this happen as soon diff --git a/include/sys/zcp.h b/include/sys/zcp.h index b9c8ef0069f..b720d863779 100644 --- a/include/sys/zcp.h +++ b/include/sys/zcp.h @@ -52,6 +52,12 @@ typedef struct zcp_cleanup_handler { list_node_t zch_node; } zcp_cleanup_handler_t; +typedef struct zcp_alloc_arg { + boolean_t aa_must_succeed; + int64_t aa_alloc_remaining; + int64_t aa_alloc_limit; +} zcp_alloc_arg_t; + typedef struct zcp_run_info { dsl_pool_t *zri_pool; @@ -93,6 +99,11 @@ typedef struct zcp_run_info { */ boolean_t zri_timed_out; + /* + * Channel program was canceled by user + */ + boolean_t zri_canceled; + /* * Boolean indicating whether or not we are running in syncing * context. @@ -104,6 +115,26 @@ typedef struct zcp_run_info { * triggered in the event of a fatal error. */ list_t zri_cleanup_handlers; + + /* + * The Lua state context of our channel program. + */ + lua_State *zri_state; + + /* + * Lua memory allocator arguments. + */ + zcp_alloc_arg_t *zri_allocargs; + + /* + * Contains output values from zcp script or error string. + */ + nvlist_t *zri_outnvl; + + /* + * The errno number returned to caller of zcp_eval(). + */ + int zri_result; } zcp_run_info_t; zcp_run_info_t *zcp_run_info(lua_State *); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 598b86a7a65..def9de78146 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -307,6 +307,7 @@ typedef pthread_cond_t kcondvar_t; extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag); @@ -315,8 +316,8 @@ extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at) -#define cv_wait_sig(cv, mp) cv_wait(cv, mp) #define cv_wait_io(cv, mp) cv_wait(cv, mp) +#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp) #define cv_timedwait_sig_hires(cv, mp, t, r, f) \ cv_timedwait_hires(cv, mp, t, r, f) diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0f39e0d72bc..da172449c73 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -339,6 +339,13 @@ cv_wait(kcondvar_t *cv, kmutex_t *mp) mp->m_owner = pthread_self(); } +int +cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) +{ + cv_wait(cv, mp); + return (1); +} + clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index a7a9d1db9a9..19c575f770b 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -29,6 +29,12 @@ #include #include +#include + +#ifdef HAVE_SCHED_SIGNAL_HEADER +#include +#endif + void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) { @@ -144,10 +150,21 @@ __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_io); -void +int +__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) +{ + cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); + + return (signal_pending(current) ? 0 : 1); +} +EXPORT_SYMBOL(__cv_wait_io_sig); + +int __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) { cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); + + return (signal_pending(current) ? 0 : 1); } EXPORT_SYMBOL(__cv_wait_sig); diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index b63ce5cad90..b225eed37d4 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -41,7 +41,7 @@ dsl_null_checkfunc(void *arg, dmu_tx_t *tx) static int dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, - dsl_syncfunc_t *syncfunc, void *arg, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, boolean_t early) { spa_t *spa; @@ -85,6 +85,11 @@ dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, dmu_tx_commit(tx); + if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) { + /* current contract is to call func once */ + sigfunc(arg, tx); + sigfunc = NULL; /* in case we're performing an EAGAIN retry */ + } txg_wait_synced(dp, dst.dst_txg); if (dst.dst_error == EAGAIN) { @@ -124,7 +129,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_FALSE)); } @@ -146,10 +151,23 @@ dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_TRUE)); } +/* + * A standard synctask that can be interrupted from a signal. The sigfunc + * is called once if a signal occurred while waiting for the task to sync. + */ +int +dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, + int blocks_modified, zfs_space_check_t space_check) +{ + return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg, + blocks_modified, space_check, B_FALSE)); +} + static void dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx, diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 0fcd569e3b4..d1fb50188e4 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -675,8 +675,8 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) mutex_exit(&tx->tx_sync_lock); } -void -txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +static boolean_t +txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig) { tx_state_t *tx = &dp->dp_tx; @@ -695,9 +695,39 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg) "tx_synced=%llu waiting=%llu dp=%p\n", tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); cv_broadcast(&tx->tx_sync_more_cv); - cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + if (wait_sig) { + /* + * Condition wait here but stop if the thread receives a + * signal. The caller may call txg_wait_synced*() again + * to resume waiting for this txg. + */ + if (cv_wait_io_sig(&tx->tx_sync_done_cv, + &tx->tx_sync_lock) == 0) { + mutex_exit(&tx->tx_sync_lock); + return (B_TRUE); + } + } else { + cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + } } mutex_exit(&tx->tx_sync_lock); + return (B_FALSE); +} + +void +txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +{ + VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE)); +} + +/* + * Similar to a txg_wait_synced but it can be interrupted from a signal. + * Returns B_TRUE if the thread was signaled while waiting. + */ +boolean_t +txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg) +{ + return (txg_wait_synced_impl(dp, txg, B_TRUE)); } /* diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 4894df11d5f..1aeea131449 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -118,21 +118,6 @@ static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int); static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *, int); -typedef struct zcp_alloc_arg { - boolean_t aa_must_succeed; - int64_t aa_alloc_remaining; - int64_t aa_alloc_limit; -} zcp_alloc_arg_t; - -typedef struct zcp_eval_arg { - lua_State *ea_state; - zcp_alloc_arg_t *ea_allocargs; - cred_t *ea_cred; - nvlist_t *ea_outnvl; - int ea_result; - uint64_t ea_instrlimit; -} zcp_eval_arg_t; - /* * The outer-most error callback handler for use with lua_pcall(). On * error Lua will call this callback with a single argument that @@ -452,7 +437,7 @@ zcp_lua_to_nvlist_helper(lua_State *state) static void zcp_convert_return_values(lua_State *state, nvlist_t *nvl, - const char *key, zcp_eval_arg_t *evalargs) + const char *key, int *result) { int err; VERIFY3U(1, ==, lua_gettop(state)); @@ -464,7 +449,7 @@ zcp_convert_return_values(lua_State *state, nvlist_t *nvl, err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */ if (err != 0) { zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR); - evalargs->ea_result = SET_ERROR(ECHRNG); + *result = SET_ERROR(ECHRNG); } } @@ -791,19 +776,32 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) static void zcp_lua_counthook(lua_State *state, lua_Debug *ar) { - /* - * If we're called, check how many instructions the channel program has - * executed so far, and compare against the limit. - */ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); zcp_run_info_t *ri = lua_touserdata(state, -1); + /* + * Check if we were canceled while waiting for the + * txg to sync or from our open context thread + */ + if (ri->zri_canceled || + (!ri->zri_sync && issig(JUSTLOOKING) && issig(FORREAL))) { + ri->zri_canceled = B_TRUE; + (void) lua_pushstring(state, "Channel program was canceled."); + (void) lua_error(state); + /* Unreachable */ + } + + /* + * Check how many instructions the channel program has + * executed so far, and compare against the limit. + */ ri->zri_curinstrs += zfs_lua_check_instrlimit_interval; if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) { ri->zri_timed_out = B_TRUE; (void) lua_pushstring(state, "Channel program timed out."); (void) lua_error(state); + /* Unreachable */ } } @@ -816,31 +814,25 @@ zcp_panic_cb(lua_State *state) } static void -zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) +zcp_eval_impl(dmu_tx_t *tx, zcp_run_info_t *ri) { int err; - zcp_run_info_t ri; - lua_State *state = evalargs->ea_state; + lua_State *state = ri->zri_state; VERIFY3U(3, ==, lua_gettop(state)); + /* finish initializing our runtime state */ + ri->zri_pool = dmu_tx_pool(tx); + ri->zri_tx = tx; + list_create(&ri->zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), + offsetof(zcp_cleanup_handler_t, zch_node)); + /* * Store the zcp_run_info_t struct for this run in the Lua registry. * Registry entries are not directly accessible by the Lua scripts but * can be accessed by our callbacks. */ - ri.zri_space_used = 0; - ri.zri_pool = dmu_tx_pool(tx); - ri.zri_cred = evalargs->ea_cred; - ri.zri_tx = tx; - ri.zri_timed_out = B_FALSE; - ri.zri_sync = sync; - list_create(&ri.zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), - offsetof(zcp_cleanup_handler_t, zch_node)); - ri.zri_curinstrs = 0; - ri.zri_maxinstrs = evalargs->ea_instrlimit; - - lua_pushlightuserdata(state, &ri); + lua_pushlightuserdata(state, ri); lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); VERIFY3U(3, ==, lua_gettop(state)); @@ -857,7 +849,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * off control to the channel program. Channel programs that use too * much memory should die with ENOSPC. */ - evalargs->ea_allocargs->aa_must_succeed = B_FALSE; + ri->zri_allocargs->aa_must_succeed = B_FALSE; /* * Call the Lua function that open-context passed us. This pops the @@ -869,14 +861,14 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) /* * Let Lua use KM_SLEEP while we interpret the return values. */ - evalargs->ea_allocargs->aa_must_succeed = B_TRUE; + ri->zri_allocargs->aa_must_succeed = B_TRUE; /* * Remove the error handler callback from the stack. At this point, * there shouldn't be any cleanup handler registered in the handler * list (zri_cleanup_handlers), regardless of whether it ran or not. */ - list_destroy(&ri.zri_cleanup_handlers); + list_destroy(&ri->zri_cleanup_handlers); lua_remove(state, 1); switch (err) { @@ -896,16 +888,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) int return_count = lua_gettop(state); if (return_count == 1) { - evalargs->ea_result = 0; - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_RETURN, evalargs); + ri->zri_result = 0; + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_RETURN, &ri->zri_result); } else if (return_count > 1) { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); lua_settop(state, 0); (void) lua_pushfstring(state, "Multiple return " "values not supported"); - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); } break; } @@ -919,19 +911,20 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * stack. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); - if (evalargs->ea_result == ETIME && - evalargs->ea_outnvl != NULL) { - (void) nvlist_add_uint64(evalargs->ea_outnvl, - ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs); + if (ri->zri_result == ETIME && ri->zri_outnvl != NULL) { + (void) nvlist_add_uint64(ri->zri_outnvl, + ZCP_ARG_INSTRLIMIT, ri->zri_curinstrs); } break; } @@ -943,14 +936,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * return the error message. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); break; } case LUA_ERRMEM: @@ -958,7 +953,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * Lua ran out of memory while running the channel program. * There's not much we can do. */ - evalargs->ea_result = SET_ERROR(ENOSPC); + ri->zri_result = SET_ERROR(ENOSPC); break; default: VERIFY0(err); @@ -966,21 +961,35 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) } static void -zcp_pool_error(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_pool_error(zcp_run_info_t *ri, const char *poolname) { - evalargs->ea_result = SET_ERROR(ECHRNG); - lua_settop(evalargs->ea_state, 0); - (void) lua_pushfstring(evalargs->ea_state, "Could not open pool: %s", + ri->zri_result = SET_ERROR(ECHRNG); + lua_settop(ri->zri_state, 0); + (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s", poolname); - zcp_convert_return_values(evalargs->ea_state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(ri->zri_state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); + +} + +/* + * This callback is called when txg_wait_synced_sig encountered a signal. + * The txg_wait_synced_sig will continue to wait for the txg to complete + * after calling this callback. + */ +/* ARGSUSED */ +static void +zcp_eval_sig(void *arg, dmu_tx_t *tx) +{ + zcp_run_info_t *ri = arg; + ri->zri_canceled = B_TRUE; } static void zcp_eval_sync(void *arg, dmu_tx_t *tx) { - zcp_eval_arg_t *evalargs = arg; + zcp_run_info_t *ri = arg; /* * Open context should have setup the stack to contain: @@ -988,15 +997,14 @@ zcp_eval_sync(void *arg, dmu_tx_t *tx) * 2: Script to run (converted to a Lua function) * 3: nvlist input to function (converted to Lua table or nil) */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); - zcp_eval_impl(tx, B_TRUE, evalargs); + zcp_eval_impl(tx, ri); } static void -zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_eval_open(zcp_run_info_t *ri, const char *poolname) { - int error; dsl_pool_t *dp; dmu_tx_t *tx; @@ -1004,11 +1012,11 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) /* * See comment from the same assertion in zcp_eval_sync(). */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); error = dsl_pool_hold(poolname, FTAG, &dp); if (error != 0) { - zcp_pool_error(evalargs, poolname); + zcp_pool_error(ri, poolname); return; } @@ -1023,7 +1031,7 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) */ tx = dmu_tx_create_dd(dp->dp_mos_dir); - zcp_eval_impl(tx, B_FALSE, evalargs); + zcp_eval_impl(tx, ri); dmu_tx_abort(tx); @@ -1036,7 +1044,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, { int err; lua_State *state; - zcp_eval_arg_t evalargs; + zcp_run_info_t runinfo; if (instrlimit > zfs_lua_max_instrlimit) return (SET_ERROR(EINVAL)); @@ -1136,24 +1144,29 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, } VERIFY3U(3, ==, lua_gettop(state)); - evalargs.ea_state = state; - evalargs.ea_allocargs = &allocargs; - evalargs.ea_instrlimit = instrlimit; - evalargs.ea_cred = CRED(); - evalargs.ea_outnvl = outnvl; - evalargs.ea_result = 0; + runinfo.zri_state = state; + runinfo.zri_allocargs = &allocargs; + runinfo.zri_outnvl = outnvl; + runinfo.zri_result = 0; + runinfo.zri_cred = CRED(); + runinfo.zri_timed_out = B_FALSE; + runinfo.zri_canceled = B_FALSE; + runinfo.zri_sync = sync; + runinfo.zri_space_used = 0; + runinfo.zri_curinstrs = 0; + runinfo.zri_maxinstrs = instrlimit; if (sync) { - err = dsl_sync_task(poolname, NULL, - zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_ZCP_EVAL); + err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, + zcp_eval_sig, &runinfo, 0, ZFS_SPACE_CHECK_ZCP_EVAL); if (err != 0) - zcp_pool_error(&evalargs, poolname); + zcp_pool_error(&runinfo, poolname); } else { - zcp_eval_open(&evalargs, poolname); + zcp_eval_open(&runinfo, poolname); } lua_close(state); - return (evalargs.ea_result); + return (runinfo.zri_result); } /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index ff98661ec79..5b3c58e0016 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -87,7 +87,7 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit', 'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict', 'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult', 'tst.rollback_one', 'tst.snapshot_destroy', 'tst.snapshot_neg', - 'tst.snapshot_recursive', 'tst.snapshot_simple'] + 'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.terminate_by_signal'] tags = ['functional', 'channel_program', 'synctask_core'] [tests/functional/chattr] diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am index 7bdaf53de2f..cc86a2db919 100644 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am @@ -27,7 +27,8 @@ dist_pkgdata_SCRIPTS = \ tst.snapshot_destroy.ksh \ tst.snapshot_neg.ksh \ tst.snapshot_recursive.ksh \ - tst.snapshot_simple.ksh + tst.snapshot_simple.ksh \ + tst.terminate_by_signal.ksh dist_pkgdata_DATA = \ tst.get_index_props.out \ diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh new file mode 100755 index 00000000000..6f58cc1f4f8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# +. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib + +# +# DESCRIPTION: Execute a long-running zfs channel program and attempt to +# cancel it by sending a signal. +# + +verify_runnable "global" + +rootfs=$TESTPOOL/$TESTFS +snapname=snap +limit=50000000 + +function cleanup +{ + datasetexists $rootfs && log_must zfs destroy -R $rootfs +} + +log_onexit cleanup + +# +# Create a working set of 100 file systems +# +for i in {1..100}; do + log_must zfs create "$rootfs/child$i" +done + +# +# Attempt to create 100 snapshots with zfs.sync.snapshot() along with some +# time consuming efforts. We use loops of zfs.check.* (dry run operations) +# to consume instructions before the next zfs.sync.snapshot() occurs. +# +# Without a signal interruption this ZCP would take several minutes and +# generate over 30 million Lua instructions. +# +function chan_prog +{ +zfs program -t $limit $TESTPOOL - $rootfs $snapname <<-EOF + arg = ... + fs = arg["argv"][1] + snap = arg["argv"][2] + for child in zfs.list.children(fs) do + local snapname = child .. "@" .. snap + zfs.check.snapshot(snapname) + zfs.sync.snapshot(snapname) + for i=1,20000,1 do + zfs.check.snapshot(snapname) + zfs.check.destroy(snapname) + zfs.check.destroy(fs) + end + end + return "should not have reached here" +EOF +} + +log_note "Executing a long-running zfs program in the background" +chan_prog & +CHILD=$! + +# +# After waiting, send a kill signal to the channel program process. +# This should stop the ZCP near a million instructions but still have +# created some of the snapshots. Note that since the above zfs program +# command might get wrapped, we also issue a kill to the group. +# +sleep 10 +log_pos pkill -P $CHILD +log_pos kill $CHILD + +# +# Make sure the channel program did not fully complete by enforcing +# that not all of the snapshots were created. +# +snap_count=$(zfs list -t snapshot | grep $TESTPOOL | wc -l) +log_note "$snap_count snapshots created by ZCP" + +if [ "$snap_count" -eq 0 ]; then + log_fail "Channel progam failed to run." +elif [ "$snap_count" -gt 50 ]; then + log_fail "Too many snapshots after a cancel ($snap_count)." +else + log_pass "Canceling a long-running channel program works." +fi From d47ee5ad1c554dd197366b3951922b9cc42b5804 Mon Sep 17 00:00:00 2001 From: loli10K Date: Tue, 25 Jun 2019 03:02:17 +0200 Subject: [PATCH 153/325] Fix bp_embedded_type enum definition With the addition of BP_EMBEDDED_TYPE_REDACTED in 30af21b0 a couple of codepaths make wrong assumptions and could potentially result in errors. Reviewed-by: Brian Behlendorf Reviewed-by: Chris Dunlop Reviewed-by: Paul Dagnelie Signed-off-by: loli10K Closes #8951 Conflicts: include/sys/spa.h --- include/sys/spa.h | 4 ++-- module/zfs/zio.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sys/spa.h b/include/sys/spa.h index ca63d3a4905..d43801de5dd 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -401,8 +401,8 @@ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { BP_EMBEDDED_TYPE_DATA, - BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ - NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED + BP_EMBEDDED_TYPE_RESERVED, /* Reserved for Delphix byteswap feature. */ + NUM_BP_EMBEDDED_TYPES } bp_embedded_type_t; #define BPE_NUM_WORDS 14 diff --git a/module/zfs/zio.c b/module/zfs/zio.c index f1bf377047a..5638f531938 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -908,7 +908,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) } if (BP_IS_EMBEDDED(bp)) { - if (BPE_GET_ETYPE(bp) > NUM_BP_EMBEDDED_TYPES) { + if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) { zfs_panic_recover("blkptr at %p has invalid ETYPE %llu", bp, (longlong_t)BPE_GET_ETYPE(bp)); } From 350646563fe7203cbc185acd25c12e16ea43f613 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 26 Jun 2019 11:00:12 -0700 Subject: [PATCH 154/325] Concurrent small allocation defeats large allocation With the new parallel allocators scheme, there is a possibility for a problem where two threads, allocating from the same allocator at the same time, conflict with each other. There are two primary cases to worry about. First, another thread working on another allocator activates the same metaslab that the first thread was trying to activate. This results in the first thread needing to go back and reselect a new metaslab, even though it may have waited a long time for this metaslab to load. Second, another thread working on the same allocator may have activated a different metaslab while the first thread was waiting for its metaslab to load. Both of these cases can cause the first thread to be significantly delayed in issuing its IOs. The second case can also cause metaslab load/unload churn; because the metaslab is loaded but not fully activated, we never set the selected_txg, which results in the metaslab being immediately unloaded again. This process can repeat many times, wasting disk and cpu resources. This is more likely to happen when the IO of the first thread is a larger one (like a ZIL write) and the other thread is doing a smaller write, because it is more likely to find an acceptable metaslab quickly. There are two primary changes. The first is to always proceed with the allocation when returning from metaslab_activate if we were preempted in either of the ways described in the previous section. The second change is to set the selected_txg before we do the call to activate so that even if the metaslab is not used for an allocation, we won't immediately attempt to unload it. Reviewed by: Jerry Jelinek Reviewed by: Matt Ahrens Reviewed by: Serapheim Dimitropoulos Reviewed by: Brian Behlendorf Signed-off-by: Paul Dagnelie External-issue: DLPX-61314 Closes #8843 --- module/zfs/metaslab.c | 280 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 231 insertions(+), 49 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 92310aaf901..a14057f8913 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -994,8 +994,10 @@ metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) static void metaslab_group_sort_impl(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) { + ASSERT(MUTEX_HELD(&msp->ms_lock)); ASSERT(MUTEX_HELD(&mg->mg_lock)); ASSERT(msp->ms_group == mg); + avl_remove(&mg->mg_metaslab_tree, msp); msp->ms_weight = weight; avl_add(&mg->mg_metaslab_tree, msp); @@ -1794,6 +1796,7 @@ metaslab_unload(metaslab_t *msp) range_tree_vacate(msp->ms_allocatable, NULL, NULL); msp->ms_loaded = B_FALSE; + msp->ms_activation_weight = 0; msp->ms_weight &= ~METASLAB_ACTIVE_MASK; msp->ms_max_size = 0; @@ -2324,11 +2327,10 @@ metaslab_segment_weight(metaslab_t *msp) boolean_t metaslab_should_allocate(metaslab_t *msp, uint64_t asize) { - boolean_t should_allocate; - if (msp->ms_max_size != 0) return (msp->ms_max_size >= asize); + boolean_t should_allocate; if (!WEIGHT_IS_SPACEBASED(msp->ms_weight)) { /* * The metaslab segment weight indicates segments in the @@ -2342,6 +2344,7 @@ metaslab_should_allocate(metaslab_t *msp, uint64_t asize) should_allocate = (asize <= (msp->ms_weight & ~METASLAB_WEIGHT_TYPE)); } + return (should_allocate); } static uint64_t @@ -2389,6 +2392,8 @@ metaslab_weight(metaslab_t *msp) void metaslab_recalculate_weight_and_sort(metaslab_t *msp) { + ASSERT(MUTEX_HELD(&msp->ms_lock)); + /* note: we preserve the mask (e.g. indication of primary, etc..) */ uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; metaslab_group_sort(msp->ms_group, msp, @@ -2399,16 +2404,18 @@ static int metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp, int allocator, uint64_t activation_weight) { + ASSERT(MUTEX_HELD(&msp->ms_lock)); + /* * If we're activating for the claim code, we don't want to actually * set the metaslab up for a specific allocator. */ if (activation_weight == METASLAB_WEIGHT_CLAIM) return (0); + metaslab_t **arr = (activation_weight == METASLAB_WEIGHT_PRIMARY ? mg->mg_primaries : mg->mg_secondaries); - ASSERT(MUTEX_HELD(&msp->ms_lock)); mutex_enter(&mg->mg_lock); if (arr[allocator] != NULL) { mutex_exit(&mg->mg_lock); @@ -2429,28 +2436,65 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight) { ASSERT(MUTEX_HELD(&msp->ms_lock)); - if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { - int error = metaslab_load(msp); - if (error != 0) { - metaslab_group_sort(msp->ms_group, msp, 0); - return (error); - } - if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { - /* - * The metaslab was activated for another allocator - * while we were waiting, we should reselect. - */ + /* + * The current metaslab is already activated for us so there + * is nothing to do. Already activated though, doesn't mean + * that this metaslab is activated for our allocator nor our + * requested activation weight. The metaslab could have started + * as an active one for our allocator but changed allocators + * while we were waiting to grab its ms_lock or we stole it + * [see find_valid_metaslab()]. This means that there is a + * possibility of passivating a metaslab of another allocator + * or from a different activation mask, from this thread. + */ + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { + ASSERT(msp->ms_loaded); + return (0); + } + + int error = metaslab_load(msp); + if (error != 0) { + metaslab_group_sort(msp->ms_group, msp, 0); + return (error); + } + + /* + * When entering metaslab_load() we may have dropped the + * ms_lock because we were loading this metaslab, or we + * were waiting for another thread to load it for us. In + * that scenario, we recheck the weight of the metaslab + * to see if it was activated by another thread. + * + * If the metaslab was activated for another allocator or + * it was activated with a different activation weight (e.g. + * we wanted to make it a primary but it was activated as + * secondary) we return error (EBUSY). + * + * If the metaslab was activated for the same allocator + * and requested activation mask, skip activating it. + */ + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { + if (msp->ms_allocator != allocator) + return (EBUSY); + + if ((msp->ms_weight & activation_weight) == 0) return (SET_ERROR(EBUSY)); - } - if ((error = metaslab_activate_allocator(msp->ms_group, msp, - allocator, activation_weight)) != 0) { - return (error); - } - msp->ms_activation_weight = msp->ms_weight; - metaslab_group_sort(msp->ms_group, msp, - msp->ms_weight | activation_weight); + EQUIV((activation_weight == METASLAB_WEIGHT_PRIMARY), + msp->ms_primary); + return (0); } + + if ((error = metaslab_activate_allocator(msp->ms_group, msp, + allocator, activation_weight)) != 0) { + return (error); + } + + ASSERT0(msp->ms_activation_weight); + msp->ms_activation_weight = msp->ms_weight; + metaslab_group_sort(msp->ms_group, msp, + msp->ms_weight | activation_weight); + ASSERT(msp->ms_loaded); ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); @@ -2462,6 +2506,8 @@ metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) { ASSERT(MUTEX_HELD(&msp->ms_lock)); + ASSERT(msp->ms_loaded); + if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) { metaslab_group_sort(mg, msp, weight); return; @@ -2469,15 +2515,16 @@ metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp, mutex_enter(&mg->mg_lock); ASSERT3P(msp->ms_group, ==, mg); + ASSERT3S(0, <=, msp->ms_allocator); + ASSERT3U(msp->ms_allocator, <, mg->mg_allocators); + if (msp->ms_primary) { - ASSERT3U(0, <=, msp->ms_allocator); - ASSERT3U(msp->ms_allocator, <, mg->mg_allocators); ASSERT3P(mg->mg_primaries[msp->ms_allocator], ==, msp); ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); mg->mg_primaries[msp->ms_allocator] = NULL; } else { - ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp); + ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); mg->mg_secondaries[msp->ms_allocator] = NULL; } msp->ms_allocator = -1; @@ -2500,9 +2547,10 @@ metaslab_passivate(metaslab_t *msp, uint64_t weight) range_tree_space(msp->ms_allocatable) == 0); ASSERT0(weight & METASLAB_ACTIVE_MASK); + ASSERT(msp->ms_activation_weight != 0); msp->ms_activation_weight = 0; metaslab_passivate_allocator(msp->ms_group, msp, weight); - ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); + ASSERT0(msp->ms_weight & METASLAB_ACTIVE_MASK); } /* @@ -3489,6 +3537,41 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight, return (msp); } +void +metaslab_active_mask_verify(metaslab_t *msp) +{ + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0) + return; + + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) + return; + + if (msp->ms_weight & METASLAB_WEIGHT_PRIMARY) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM); + VERIFY3S(msp->ms_allocator, !=, -1); + VERIFY(msp->ms_primary); + return; + } + + if (msp->ms_weight & METASLAB_WEIGHT_SECONDARY) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM); + VERIFY3S(msp->ms_allocator, !=, -1); + VERIFY(!msp->ms_primary); + return; + } + + if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); + VERIFY3S(msp->ms_allocator, ==, -1); + return; + } +} + /* ARGSUSED */ static uint64_t metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, @@ -3497,9 +3580,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, { metaslab_t *msp = NULL; uint64_t offset = -1ULL; - uint64_t activation_weight; - activation_weight = METASLAB_WEIGHT_PRIMARY; + uint64_t activation_weight = METASLAB_WEIGHT_PRIMARY; for (int i = 0; i < d; i++) { if (activation_weight == METASLAB_WEIGHT_PRIMARY && DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { @@ -3540,10 +3622,30 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (activation_weight == METASLAB_WEIGHT_PRIMARY && mg->mg_primaries[allocator] != NULL) { msp = mg->mg_primaries[allocator]; + + /* + * Even though we don't hold the ms_lock for the + * primary metaslab, those fields should not + * change while we hold the mg_lock. Thus is is + * safe to make assertions on them. + */ + ASSERT(msp->ms_primary); + ASSERT3S(msp->ms_allocator, ==, allocator); + ASSERT(msp->ms_loaded); + was_active = B_TRUE; } else if (activation_weight == METASLAB_WEIGHT_SECONDARY && mg->mg_secondaries[allocator] != NULL) { msp = mg->mg_secondaries[allocator]; + + /* + * See comment above about the similar assertions + * for the primary metaslab. + */ + ASSERT(!msp->ms_primary); + ASSERT3S(msp->ms_allocator, ==, allocator); + ASSERT(msp->ms_loaded); + was_active = B_TRUE; } else { msp = find_valid_metaslab(mg, activation_weight, dva, d, @@ -3556,8 +3658,20 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, kmem_free(search, sizeof (*search)); return (-1ULL); } - mutex_enter(&msp->ms_lock); + + metaslab_active_mask_verify(msp); + + /* + * This code is disabled out because of issues with + * tracepoints in non-gpl kernel modules. + */ +#if 0 + DTRACE_PROBE3(ms__activation__attempt, + metaslab_t *, msp, uint64_t, activation_weight, + boolean_t, was_active); +#endif + /* * Ensure that the metaslab we have selected is still * capable of handling our request. It's possible that @@ -3567,44 +3681,80 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, * a new metaslab. */ if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) { + ASSERT3S(msp->ms_allocator, ==, -1); mutex_exit(&msp->ms_lock); continue; } /* - * If the metaslab is freshly activated for an allocator that - * isn't the one we're allocating from, or if it's a primary and - * we're seeking a secondary (or vice versa), we go back and - * select a new metaslab. + * If the metaslab was activated for another allocator + * while we were waiting in the ms_lock above, or it's + * a primary and we're seeking a secondary (or vice versa), + * we go back and select a new metaslab. */ if (!was_active && (msp->ms_weight & METASLAB_ACTIVE_MASK) && (msp->ms_allocator != -1) && (msp->ms_allocator != allocator || ((activation_weight == METASLAB_WEIGHT_PRIMARY) != msp->ms_primary))) { + ASSERT(msp->ms_loaded); + ASSERT((msp->ms_weight & METASLAB_WEIGHT_CLAIM) || + msp->ms_allocator != -1); mutex_exit(&msp->ms_lock); continue; } + /* + * This metaslab was used for claiming regions allocated + * by the ZIL during pool import. Once these regions are + * claimed we don't need to keep the CLAIM bit set + * anymore. Passivate this metaslab to zero its activation + * mask. + */ if (msp->ms_weight & METASLAB_WEIGHT_CLAIM && activation_weight != METASLAB_WEIGHT_CLAIM) { + ASSERT(msp->ms_loaded); + ASSERT3S(msp->ms_allocator, ==, -1); metaslab_passivate(msp, msp->ms_weight & ~METASLAB_WEIGHT_CLAIM); mutex_exit(&msp->ms_lock); continue; } - if (metaslab_activate(msp, allocator, activation_weight) != 0) { + msp->ms_selected_txg = txg; + + int activation_error = + metaslab_activate(msp, allocator, activation_weight); + metaslab_active_mask_verify(msp); + + /* + * If the metaslab was activated by another thread for + * another allocator or activation_weight (EBUSY), or it + * failed because another metaslab was assigned as primary + * for this allocator (EEXIST) we continue using this + * metaslab for our allocation, rather than going on to a + * worse metaslab (we waited for that metaslab to be loaded + * after all). + * + * If the activation failed due to an I/O error we skip to + * the next metaslab. + */ + boolean_t activated; + if (activation_error == 0) { + activated = B_TRUE; + } else if (activation_error == EBUSY || + activation_error == EEXIST) { + activated = B_FALSE; + } else { mutex_exit(&msp->ms_lock); continue; } - - msp->ms_selected_txg = txg; + ASSERT(msp->ms_loaded); /* * Now that we have the lock, recheck to see if we should * continue to use this metaslab for this allocation. The - * the metaslab is now loaded so metaslab_should_allocate() can - * accurately determine if the allocation attempt should + * the metaslab is now loaded so metaslab_should_allocate() + * can accurately determine if the allocation attempt should * proceed. */ if (!metaslab_should_allocate(msp, asize)) { @@ -3614,10 +3764,9 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, goto next; } - /* - * If this metaslab is currently condensing then pick again as - * we can't manipulate this metaslab until it's committed + * If this metaslab is currently condensing then pick again + * as we can't manipulate this metaslab until it's committed * to disk. If this metaslab is being initialized, we shouldn't * allocate from it since the allocated region might be * overwritten after allocation. @@ -3625,15 +3774,19 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (msp->ms_condensing) { metaslab_trace_add(zal, mg, msp, asize, d, TRACE_CONDENSING, allocator); - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); + if (activated) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } mutex_exit(&msp->ms_lock); continue; } else if (msp->ms_disabled > 0) { metaslab_trace_add(zal, mg, msp, asize, d, TRACE_DISABLED, allocator); - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); + if (activated) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } mutex_exit(&msp->ms_lock); continue; } @@ -3643,12 +3796,22 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (offset != -1ULL) { /* Proactively passivate the metaslab, if needed */ - metaslab_segment_may_passivate(msp); + if (activated) + metaslab_segment_may_passivate(msp); break; } next: ASSERT(msp->ms_loaded); + /* + * This code is disabled out because of issues with + * tracepoints in non-gpl kernel modules. + */ +#if 0 + DTRACE_PROBE2(ms__alloc__failure, metaslab_t *, msp, + uint64_t, asize); +#endif + /* * We were unable to allocate from this metaslab so determine * a new weight for this metaslab. Now that we have loaded @@ -3670,14 +3833,33 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, * currently available for allocation and is accurate * even within a sync pass. */ + uint64_t weight; if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) { - uint64_t weight = metaslab_block_maxsize(msp); + weight = metaslab_block_maxsize(msp); WEIGHT_SET_SPACEBASED(weight); + } else { + weight = metaslab_weight_from_range_tree(msp); + } + + if (activated) { metaslab_passivate(msp, weight); } else { - metaslab_passivate(msp, - metaslab_weight_from_range_tree(msp)); + /* + * For the case where we use the metaslab that is + * active for another allocator we want to make + * sure that we retain the activation mask. + * + * Note that we could attempt to use something like + * metaslab_recalculate_weight_and_sort() that + * retains the activation mask here. That function + * uses metaslab_weight() to set the weight though + * which is not as accurate as the calculations + * above. + */ + weight |= msp->ms_weight & METASLAB_ACTIVE_MASK; + metaslab_group_sort(mg, msp, weight); } + metaslab_active_mask_verify(msp); /* * We have just failed an allocation attempt, check From f3f46b0e45e64d169f0faa08a01fcafafd8d179a Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Sun, 30 Jun 2019 23:38:07 +0000 Subject: [PATCH 155/325] OpenZFS 9318 - vol_volsize_to_reservation does not account for raidz skip blocks When a volume is created in a pool with raidz vdevs and volblocksize != 128k, the volume can reference more space than is reserved with the automatically calculated refreservation. There are two deficiencies in vol_volsize_to_reservation that contribute to this: 1) Skip blocks may be added to keep each allocation a multiple of parity + 1. This is the dominating factor when volblocksize is close to 2^ashift. 2) raidz deflation for 128 KB blocks is different for most other block sizes. See "The theory of raidz space accounting" comment in libzfs_dataset.c for a full explanation. Authored by: Mike Gerdts Reviewed by: Richard Elling Reviewed by: Sanjay Nadkarni Reviewed by: Jerry Jelinek Reviewed by: Matt Ahrens Reviewed by: Kody Kantor Reviewed-by: Brian Behlendorf Approved by: Dan McDonald Ported-by: Mike Gerdts Porting Notes: * ZTS: wait for zvols to exist before writing * ZTS: use log_must_busy with {zpool|zfs} destroy OpenZFS-issue: https://www.illumos.org/issues/9318 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/b73ccab0 Closes #8973 --- cmd/zfs/zfs_main.c | 6 +- include/libzfs.h | 5 +- lib/libzfs/libzfs_dataset.c | 189 ++++++++++++++++- tests/runfiles/linux.run | 3 +- .../tests/functional/refreserv/Makefile.am | 4 +- .../refreserv/refreserv_multi_raidz.ksh | 197 ++++++++++++++++++ .../functional/refreserv/refreserv_raidz.ksh | 130 ++++++++++++ 7 files changed, 520 insertions(+), 14 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh create mode 100755 tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 07421605522..224a004d88d 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -29,6 +29,7 @@ * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2019 Datto Inc. * Copyright (c) 2019, loli10K + * Copyright 2019 Joyent, Inc. */ #include @@ -992,10 +993,11 @@ zfs_do_create(int argc, char **argv) zpool_close(zpool_handle); goto error; } - zpool_close(zpool_handle); - volsize = zvol_volsize_to_reservation(volsize, real_props); + volsize = zvol_volsize_to_reservation(zpool_handle, volsize, + real_props); nvlist_free(real_props); + zpool_close(zpool_handle); if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop), &strval) != 0) { diff --git a/include/libzfs.h b/include/libzfs.h index a5b2a8393f4..fed4eda0074 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright Joyent, Inc. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2016, Intel Corporation. * Copyright 2016 Nexenta Systems, Inc. @@ -687,7 +687,8 @@ extern int zfs_hold(zfs_handle_t *, const char *, const char *, extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); -extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); +extern uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t, + nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 0d0194e6845..a35855d82fd 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . @@ -1618,6 +1618,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) uint64_t new_reservation; zfs_prop_t resv_prop; nvlist_t *props; + zpool_handle_t *zph = zpool_handle(zhp); /* * If this is an existing volume, and someone is setting the volsize, @@ -1632,7 +1633,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); - if ((zvol_volsize_to_reservation(old_volsize, props) != + if ((zvol_volsize_to_reservation(zph, old_volsize, props) != old_reservation) || nvlist_exists(nvl, zfs_prop_to_name(resv_prop))) { fnvlist_free(props); @@ -1643,7 +1644,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) fnvlist_free(props); return (-1); } - new_reservation = zvol_volsize_to_reservation(new_volsize, props); + new_reservation = zvol_volsize_to_reservation(zph, new_volsize, props); fnvlist_free(props); if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), @@ -1698,7 +1699,8 @@ zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl) volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); } - resvsize = zvol_volsize_to_reservation(volsize, props); + resvsize = zvol_volsize_to_reservation(zpool_handle(zhp), volsize, + props); fnvlist_free(props); (void) nvlist_remove_all(nvl, zfs_prop_to_name(prop)); @@ -5363,12 +5365,176 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) } /* - * Convert the zvol's volume size to an appropriate reservation. + * The theory of raidz space accounting + * + * The "referenced" property of RAIDZ vdevs is scaled such that a 128KB block + * will "reference" 128KB, even though it allocates more than that, to store the + * parity information (and perhaps skip sectors). This concept of the + * "referenced" (and other DMU space accounting) being lower than the allocated + * space by a constant factor is called "raidz deflation." + * + * As mentioned above, the constant factor for raidz deflation assumes a 128KB + * block size. However, zvols typically have a much smaller block size (default + * 8KB). These smaller blocks may require proportionally much more parity + * information (and perhaps skip sectors). In this case, the change to the + * "referenced" property may be much more than the logical block size. + * + * Suppose a raidz vdev has 5 disks with ashift=12. A 128k block may be written + * as follows. + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | D8 | D16 | D24 | + * | P1 | D1 | D9 | D17 | D25 | + * | P2 | D2 | D10 | D18 | D26 | + * | P3 | D3 | D11 | D19 | D27 | + * | P4 | D4 | D12 | D20 | D28 | + * | P5 | D5 | D13 | D21 | D29 | + * | P6 | D6 | D14 | D22 | D30 | + * | P7 | D7 | D15 | D23 | D31 | + * +-------+-------+-------+-------+-------+ + * + * Above, notice that 160k was allocated: 8 x 4k parity sectors + 32 x 4k data + * sectors. The dataset's referenced will increase by 128k and the pool's + * allocated and free properties will be adjusted by 160k. + * + * A 4k block written to the same raidz vdev will require two 4k sectors. The + * blank cells represent unallocated space. + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | | | | + * +-------+-------+-------+-------+-------+ + * + * Above, notice that the 4k block required one sector for parity and another + * for data. vdev_raidz_asize() will return 8k and as such the pool's allocated + * and free properties will be adjusted by 8k. The dataset will not be charged + * 8k. Rather, it will be charged a value that is scaled according to the + * overhead of the 128k block on the same vdev. This 8k allocation will be + * charged 8k * 128k / 160k. 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as + * calculated in the 128k block example above. + * + * Every raidz allocation is sized to be a multiple of nparity+1 sectors. That + * is, every raidz1 allocation will be a multiple of 2 sectors, raidz2 + * allocations are a multiple of 3 sectors, and raidz3 allocations are a + * multiple of of 4 sectors. When a block does not fill the required number of + * sectors, skip blocks (sectors) are used. + * + * An 8k block being written to a raidz vdev may be written as follows: + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | D1 | S0 | | + * +-------+-------+-------+-------+-------+ + * + * In order to maintain the nparity+1 allocation size, a skip block (S0) was + * added. For this 8k block, the pool's allocated and free properties are + * adjusted by 16k and the dataset's referenced is increased by 16k * 128k / + * 160k. Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in + * the 128k block example above. + * + * Compression may lead to a variety of block sizes being written for the same + * volume or file. There is no clear way to reserve just the amount of space + * that will be required, so the worst case (no compression) is assumed. + * Note that metadata blocks will typically be compressed, so the reservation + * size returned by zvol_volsize_to_reservation() will generally be slightly + * larger than the maximum that the volume can reference. + */ + +/* + * Derived from function of same name in module/zfs/vdev_raidz.c. Returns the + * amount of space (in bytes) that will be allocated for the specified block + * size. Note that the "referenced" space accounted will be less than this, but + * not necessarily equal to "blksize", due to RAIDZ deflation. + */ +static uint64_t +vdev_raidz_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift, + uint64_t blksize) +{ + uint64_t asize, ndata; + + ASSERT3U(ndisks, >, nparity); + ndata = ndisks - nparity; + asize = ((blksize - 1) >> ashift) + 1; + asize += nparity * ((asize + ndata - 1) / ndata); + asize = roundup(asize, nparity + 1) << ashift; + + return (asize); +} + +/* + * Determine how much space will be allocated if it lands on the most space- + * inefficient top-level vdev. Returns the size in bytes required to store one + * copy of the volume data. See theory comment above. + */ +static uint64_t +volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize) +{ + nvlist_t *config, *tree, **vdevs; + uint_t nvdevs, v; + uint64_t ret = 0; + + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 || + nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, + &vdevs, &nvdevs) != 0) { + return (nblocks * blksize); + } + + for (v = 0; v < nvdevs; v++) { + char *type; + uint64_t nparity, ashift, asize, tsize; + nvlist_t **disks; + uint_t ndisks; + uint64_t volsize; + + if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE, + &type) != 0 || strcmp(type, VDEV_TYPE_RAIDZ) != 0 || + nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_NPARITY, + &nparity) != 0 || + nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_ASHIFT, + &ashift) != 0 || + nvlist_lookup_nvlist_array(vdevs[v], ZPOOL_CONFIG_CHILDREN, + &disks, &ndisks) != 0) { + continue; + } + + /* allocation size for the "typical" 128k block */ + tsize = vdev_raidz_asize(ndisks, nparity, ashift, + SPA_OLD_MAXBLOCKSIZE); + /* allocation size for the blksize block */ + asize = vdev_raidz_asize(ndisks, nparity, ashift, blksize); + + /* + * Scale this size down as a ratio of 128k / tsize. See theory + * statement above. + */ + volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize; + if (volsize > ret) { + ret = volsize; + } + } + + if (ret == 0) { + ret = nblocks * blksize; + } + + return (ret); +} + +/* + * Convert the zvol's volume size to an appropriate reservation. See theory + * comment above. + * * Note: If this routine is updated, it is necessary to update the ZFS test - * suite's shell version in reservation.kshlib. + * suite's shell version in reservation.shlib. */ uint64_t -zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) +zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize, + nvlist_t *props) { uint64_t numdb; uint64_t nblocks, volblocksize; @@ -5384,7 +5550,14 @@ zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) volblocksize = ZVOL_DEFAULT_BLOCKSIZE; - nblocks = volsize/volblocksize; + + nblocks = volsize / volblocksize; + /* + * Metadata defaults to using 128k blocks, not volblocksize blocks. For + * this reason, only the data blocks are scaled based on vdev config. + */ + volsize = volsize_from_vdevs(zph, nblocks, volblocksize); + /* start with metadnode L0-L6 */ numdb = 7; /* calculate number of indirects */ diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 5b3c58e0016..107b19c32c2 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -755,7 +755,8 @@ tags = ['functional', 'refquota'] [tests/functional/refreserv] tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos', - 'refreserv_004_pos', 'refreserv_005_pos'] + 'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz', + 'refreserv_raidz'] tags = ['functional', 'refreserv'] [tests/functional/removal] diff --git a/tests/zfs-tests/tests/functional/refreserv/Makefile.am b/tests/zfs-tests/tests/functional/refreserv/Makefile.am index 96f25d444e6..bd760a1f069 100644 --- a/tests/zfs-tests/tests/functional/refreserv/Makefile.am +++ b/tests/zfs-tests/tests/functional/refreserv/Makefile.am @@ -6,7 +6,9 @@ dist_pkgdata_SCRIPTS = \ refreserv_002_pos.ksh \ refreserv_003_pos.ksh \ refreserv_004_pos.ksh \ - refreserv_005_pos.ksh + refreserv_005_pos.ksh \ + refreserv_multi_raidz.ksh \ + refreserv_raidz.ksh dist_pkgdata_DATA = \ refreserv.cfg diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh new file mode 100755 index 00000000000..803e391c9ce --- /dev/null +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh @@ -0,0 +1,197 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/refreserv/refreserv.cfg + +# +# DESCRIPTION: +# raidz refreservation=auto picks worst raidz vdev +# +# STRATEGY: +# 1. Create a pool with a single raidz vdev +# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k] +# - create a volume +# - remember its refreservation +# - destroy the volume +# 3. Destroy the pool +# 4. Recreate the pool with one more disk in the vdev, then repeat steps +# 2 and 3. +# +# NOTES: +# 1. This test will use up to 14 disks but can cover the key concepts with +# 5 disks. +# 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely. +# + +verify_runnable "global" + +typeset -a alldisks=($DISKS) + +# The larger the volsize, the better zvol_volsize_to_reservation() is at +# guessing the right number - though it is horrible with tiny blocks. At 10M on +# ashift=12, the estimate may be over 26% too high. +volsize=100 + +function cleanup +{ + default_cleanup_noexit + default_setup_noexit "${alldisks[0]}" +} + +log_assert "raidz refreservation=auto picks worst raidz vdev" +log_onexit cleanup + +poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + +# Testing tiny block sizes on ashift=12 pools causes so much size inflation +# that small test disks may fill before creating small volumes. However, +# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for +# testing the problems that arise from 4K and 8K blocks on ashift=12 pools. +bps=$(lsblk -nrdo min-io /dev/${alldisks[0]}) +case "$bps" in +512) + allshifts=(9 10 17) + ;; +4096) + allshifts=(12 13 17) + ;; +*) + log_fail "bytes/sector: $bps != (512|4096)" + ;; +esac +log_note "Testing in ashift=${allshifts[0]} mode" + +typeset -A sizes= + +# +# Determine the refreservation for a $volsize MiB volume on each raidz type at +# various block sizes. +# +for parity in 1 2 3; do + raid=raidz$parity + typeset -A sizes["$raid"] + + # Ensure we hit scenarios with and without skip blocks + for ndisks in $((parity * 2)) $((parity * 2 + 1)); do + typeset -a disks=(${alldisks[0..$((ndisks - 1))]}) + + if (( ${#disks[@]} < ndisks )); then + log_note "Too few disks to test $raid-$ndisks" + continue + fi + + typeset -A sizes["$raid"]["$ndisks"] + + log_must zpool create "$TESTPOOL" "$raid" "${disks[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Gathering refreservation for $raid-$ndisks" \ + "volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m \ + -o volblocksize=$vbs "$vol" + + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$refres" + sizes["$raid"]["$ndisks"]["$vbs"]=$refres + + log_must_busy zfs destroy "$vol" + done + + log_must_busy zpool destroy "$TESTPOOL" + done +done + +# A little extra info is always helpful when diagnosing problems. To +# pretty-print what you find in the log, do this in ksh: +# typeset -A sizes=(...) +# print -v sizes +log_note "sizes=$(print -C sizes)" + +# +# Helper furnction for checking that refreservation is calculated properly in +# multi-vdev pools. "Properly" is defined as assuming that all vdevs are as +# space inefficient as the worst one. +# +function check_vdevs { + typeset raid=$1 + typeset nd1=$2 + typeset nd2=$3 + typeset -a disks1 disks2 + typeset vbs vol refres refres1 refres2 expect + + disks1=(${alldisks[0..$((nd1 - 1))]}) + disks2=(${alldisks[$nd1..$((nd1 + nd2 - 1))]}) + if (( ${#disks2[@]} < nd2 )); then + log_note "Too few disks to test $raid-$nd1 + $raid=$nd2" + return + fi + + log_must zpool create -f "$TESTPOOL" \ + "$raid" "${disks1[@]}" "$raid" "${disks2[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Verifying $raid-$nd1 $raid-$nd2 volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m -o volblocksize=$vbs "$vol" + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$refres" + + refres1=${sizes["$raid"]["$nd1"]["$vbs"]} + refres2=${sizes["$raid"]["$nd2"]["$vbs"]} + + if (( refres1 > refres2 )); then + log_note "Expecting refres ($refres) to match refres" \ + "from $raid-$nd1 ($refres1)" + log_must test "$refres" -eq "$refres1" + else + log_note "Expecting refres ($refres) to match refres" \ + "from $raid-$nd1 ($refres2)" + log_must test "$refres" -eq "$refres2" + fi + + log_must zfs destroy "$vol" + done + + log_must zpool destroy "$TESTPOOL" +} + +# +# Verify that multi-vdev pools use the last optimistic size for all the +# permutations within a particular raidz variant. +# +for raid in "${!sizes[@]}"; do + # ksh likes to create a [0] item for us. Thanks, ksh! + [[ $raid == "0" ]] && continue + + for nd1 in "${!sizes["$raid"][@]}"; do + # And with an empty array we get one key, ''. Thanks, ksh! + [[ $nd1 == "0" || -z "$nd1" ]] && continue + + for nd2 in "${!sizes["$raid"][@]}"; do + [[ $nd2 == "0" || -z "$nd2" ]] && continue + + check_vdevs "$raid" "$nd1" "$nd2" + done + done +done + +log_pass "raidz refreservation=auto picks worst raidz vdev" diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh new file mode 100755 index 00000000000..7b1f84afe25 --- /dev/null +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh @@ -0,0 +1,130 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/refreserv/refreserv.cfg + +# +# DESCRIPTION: +# raidz refreservation=auto accounts for extra parity and skip blocks +# +# STRATEGY: +# 1. Create a pool with a single raidz vdev +# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k] +# - create a volume +# - fully overwrite it +# - verify that referenced is less than or equal to reservation +# - destroy the volume +# 3. Destroy the pool +# 4. Recreate the pool with one more disk in the vdev, then repeat steps +# 2 and 3. +# 5. Repeat all steps above for raidz2 and raidz3. +# +# NOTES: +# 1. This test will use up to 14 disks but can cover the key concepts with +# 5 disks. +# 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely. +# + +verify_runnable "global" + +typeset -a alldisks=($DISKS) + +# The larger the volsize, the better zvol_volsize_to_reservation() is at +# guessing the right number. At 10M on ashift=12, the estimate may be over 26% +# too high. +volsize=100 + +function cleanup +{ + default_cleanup_noexit + default_setup_noexit "${alldisks[0]}" +} + +log_assert "raidz refreservation=auto accounts for extra parity and skip blocks" +log_onexit cleanup + +poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + +# Testing tiny block sizes on ashift=12 pools causes so much size inflation +# that small test disks may fill before creating small volumes. However, +# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for +# testing the problems that arise from 4K and 8K blocks on ashift=12 pools. +bps=$(lsblk -nrdo min-io /dev/${alldisks[0]}) +log_must test "$bps" -eq 512 -o "$bps" -eq 4096 +case "$bps" in +512) + allshifts=(9 10 17) + maxpct=151 + ;; +4096) + allshifts=(12 13 17) + maxpct=110 + ;; +*) + log_fail "bytes/sector: $bps != (512|4096)" + ;; +esac +log_note "Testing in ashift=${allshifts[0]} mode" + +# This loop handles all iterations of steps 1 through 4 described in strategy +# comment above, +for parity in 1 2 3; do + raid=raidz$parity + + # Ensure we hit scenarios with and without skip blocks + for ndisks in $((parity * 2)) $((parity * 2 + 1)); do + typeset -a disks=(${alldisks[0..$((ndisks - 1))]}) + + if (( ${#disks[@]} < ndisks )); then + log_note "Too few disks to test $raid-$ndisks" + continue + fi + + log_must zpool create "$TESTPOOL" "$raid" "${disks[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Testing $raid-$ndisks volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m \ + -o volblocksize=$vbs "$vol" + block_device_wait "/dev/zvol/$vol" + log_must dd if=/dev/zero of=/dev/zvol/$vol \ + bs=1024k count=$volsize + sync + + ref=$(zfs get -Hpo value referenced "$vol") + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$ref" + log_must test -n "$refres" + + typeset -F2 deltapct=$((refres * 100.0 / ref)) + log_note "$raid-$ndisks refreservation $refres" \ + "is $deltapct% of reservation $res" + + log_must test "$ref" -le "$refres" + log_must test "$deltapct" -le $maxpct + + log_must_busy zfs destroy "$vol" + done + + log_must_busy zpool destroy "$TESTPOOL" + done +done + +log_pass "raidz refreservation=auto accounts for extra parity and skip blocks" From 66c8b2f65a0bd684fb4d57c5072433d5fde1bf49 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 5 Jul 2019 16:45:20 -0700 Subject: [PATCH 156/325] Don't activate metaslabs with weight 0 We return ENOSPC in metaslab_activate if the metaslab has weight 0, to avoid activating a metaslab with no space available. For sanity checking, we also assert that there is no free space in the range tree in that case. Reviewed-by: Igor Kozhukhov Reviewed by: Matt Ahrens Reviewed by: Serapheim Dimitropoulos Reviewed by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #8968 --- module/zfs/metaslab.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index a14057f8913..5da929b4843 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -2485,6 +2485,18 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight) return (0); } + /* + * If the metaslab has literally 0 space, it will have weight 0. In + * that case, don't bother activating it. This can happen if the + * metaslab had space during find_valid_metaslab, but another thread + * loaded it and used all that space while we were waiting to grab the + * lock. + */ + if (msp->ms_weight == 0) { + ASSERT0(range_tree_space(msp->ms_allocatable)); + return (SET_ERROR(ENOSPC)); + } + if ((error = metaslab_activate_allocator(msp->ms_group, msp, allocator, activation_weight)) != 0) { return (error); @@ -3735,8 +3747,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, * worse metaslab (we waited for that metaslab to be loaded * after all). * - * If the activation failed due to an I/O error we skip to - * the next metaslab. + * If the activation failed due to an I/O error or ENOSPC we + * skip to the next metaslab. */ boolean_t activated; if (activation_error == 0) { From cc8df1f11748b752bb1d7a51e359bc37e43f4e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Tue, 30 Jul 2019 19:06:09 +0200 Subject: [PATCH 157/325] install path fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * rpm: correct pkgconfig path pkconfig files get installed to $datarootdir/pkgconfig but rpm expects them to be at $datadir. This works when $datarootdir==$datadir which is the case most of the time but will fail when they differ. * install: make initramfs-tools path static Since initramfs-tools' path is nothing we can control as it is an external package it does not make any sense to install zfs additions anywhere else. Simply use /usr/share/initramfs-tools as path. Reviewed-by: Brian Behlendorf Reviewed-by: Richard Laager Signed-off-by: Michael Niewöhner Closes #9087 --- contrib/initramfs/Makefile.am | 2 +- contrib/initramfs/hooks/Makefile.am | 2 +- contrib/initramfs/scripts/Makefile.am | 2 +- contrib/initramfs/scripts/local-top/Makefile.am | 2 +- rpm/generic/zfs.spec.in | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 9f912d94664..fefd676ce0f 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -1,4 +1,4 @@ -initrddir = $(datarootdir)/initramfs-tools +initrddir = /usr/share/initramfs-tools initrd_SCRIPTS = \ conf.d/zfs conf-hooks.d/zfs hooks/zfs scripts/zfs scripts/local-top/zfs diff --git a/contrib/initramfs/hooks/Makefile.am b/contrib/initramfs/hooks/Makefile.am index c866b4fb6cd..1735872c29b 100644 --- a/contrib/initramfs/hooks/Makefile.am +++ b/contrib/initramfs/hooks/Makefile.am @@ -1,4 +1,4 @@ -hooksdir = $(datarootdir)/initramfs-tools/hooks +hooksdir = /usr/share/initramfs-tools/hooks hooks_SCRIPTS = \ zfs diff --git a/contrib/initramfs/scripts/Makefile.am b/contrib/initramfs/scripts/Makefile.am index a550311cd74..12c2641b80c 100644 --- a/contrib/initramfs/scripts/Makefile.am +++ b/contrib/initramfs/scripts/Makefile.am @@ -1,4 +1,4 @@ -scriptsdir = $(datarootdir)/initramfs-tools/scripts +scriptsdir = /usr/share/initramfs-tools/scripts scripts_DATA = \ zfs diff --git a/contrib/initramfs/scripts/local-top/Makefile.am b/contrib/initramfs/scripts/local-top/Makefile.am index 88aa2d4ffa6..c820325947b 100644 --- a/contrib/initramfs/scripts/local-top/Makefile.am +++ b/contrib/initramfs/scripts/local-top/Makefile.am @@ -1,3 +1,3 @@ -localtopdir = $(datarootdir)/initramfs-tools/scripts/local-top +localtopdir = /usr/share/initramfs-tools/scripts/local-top EXTRA_DIST = zfs diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index b9ca5ed5fb7..b4a79371672 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -458,8 +458,8 @@ systemctl --system daemon-reload >/dev/null || true %{_libdir}/libzfs*.so.* %files -n libzfs2-devel -%{_datadir}/pkgconfig/libzfs.pc -%{_datadir}/pkgconfig/libzfs_core.pc +%{_datarootdir}/pkgconfig/libzfs.pc +%{_datarootdir}/pkgconfig/libzfs_core.pc %{_libdir}/*.so %{_includedir}/* %doc AUTHORS COPYRIGHT LICENSE NOTICE README.md From 1d4faef7a5e4864af3024cc521ea243c4d90cff6 Mon Sep 17 00:00:00 2001 From: Clint Armstrong Date: Tue, 30 Jul 2019 19:02:19 -0400 Subject: [PATCH 158/325] Add channel program for property based snapshots Channel programs that many users find useful should be included with zfs in the /contrib directory. This is the first of these contributions. A channel program to recursively take snapshots of datasets with the property com.sun:auto-snapshot=true. Reviewed-by: Kash Pande Reviewed-by: Brian Behlendorf Signed-off-by: Clint Armstrong Closes #8443 Closes #9050 --- configure.ac | 1 + contrib/Makefile.am | 4 +-- contrib/zcp/Makefile.am | 1 + contrib/zcp/autosnap.lua | 75 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 contrib/zcp/Makefile.am create mode 100644 contrib/zcp/autosnap.lua diff --git a/configure.ac b/configure.ac index a3ac134ffcc..46a27f7f194 100644 --- a/configure.ac +++ b/configure.ac @@ -135,6 +135,7 @@ AC_CONFIG_FILES([ contrib/initramfs/scripts/local-top/Makefile contrib/pyzfs/Makefile contrib/pyzfs/setup.py + contrib/zcp/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 81926a83ee6..9a82f82ee38 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = bash_completion.d dracut initramfs pyzfs -DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs +SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp +DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp diff --git a/contrib/zcp/Makefile.am b/contrib/zcp/Makefile.am new file mode 100644 index 00000000000..54d65f891e3 --- /dev/null +++ b/contrib/zcp/Makefile.am @@ -0,0 +1 @@ ++EXTRA_DIST = autosnap.lua diff --git a/contrib/zcp/autosnap.lua b/contrib/zcp/autosnap.lua new file mode 100644 index 00000000000..d9ae32ce458 --- /dev/null +++ b/contrib/zcp/autosnap.lua @@ -0,0 +1,75 @@ +-- Recursively snapshot every dataset with a given property +-- +-- Usage: zfs program autosnap.lua -- [-n] [-p ] + +results = {} + +args = ... +argv = args["argv"] +usage = [[ + + +usage: zfs program autosnap.lua -- [-n] [-p ] + + -n: performs checks only, does not take snapshots + -p : property to check. [default: com.sun:auto-snapshot] + : root snapshot to create [example: tank/data@backup] +]] + +property = "com.sun:auto-snapshot" +noop = false +root_snap = nil + +for i, arg in ipairs(argv) do + if arg == "-n" then + noop = true + elseif arg == "-p" then + elseif argv[i-1] == "-p" then + property = arg + else + root_snap = arg + end +end + +if root_snap == nil or property == nil then + error(usage) +end + +root_ds_name = "" +snap_name = "" +for i = 1, #root_snap do + if root_snap:sub(i, i) == "@" then + root_ds_name = root_snap:sub(1, i-1) + snap_name = root_snap:sub(i+1, root_snap:len()) + end +end + +function auto_snap(root) + auto, source = zfs.get_prop(root, property) + if auto == "true" then + ds_snap_name = root .. "@" .. snap_name + err = 0 + if noop then + err = zfs.check.snapshot(ds_snap_name) + else + err = zfs.sync.snapshot(ds_snap_name) + end + results[ds_snap_name] = err + end + for child in zfs.list.children(root) do + auto_snap(child) + end +end + +auto_snap(root_ds_name) +err_txt = "" +for ds, err in pairs(results) do + if err ~= 0 then + err_txt = err_txt .. "failed to create " .. ds .. ": " .. err .. "\n" + end +end +if err_txt ~= "" then + error(err_txt) +end + +return results From 77d59a6d63bffe8a26b5597b0235255da0e5851b Mon Sep 17 00:00:00 2001 From: jdike <52420226+jdike@users.noreply.github.com> Date: Wed, 31 Jul 2019 17:53:39 -0400 Subject: [PATCH 159/325] lockdep false positive - move txg_kick() outside of ->dp_lock This fixes a lockdep warning by breaking a link between ->tx_sync_lock and ->dp_lock. The deadlock envisioned by lockdep is this: thread 1 holds db->db_mtx and tries to get dp->dp_lock: dsl_pool_dirty_space+0x70/0x2d0 [zfs] dbuf_dirty+0x778/0x31d0 [zfs] thread 2 holds bpo->bpo_lock and tries to get db->db_mtx: dmu_buf_will_dirty_impl dmu_buf_will_dirty+0x6b/0x6c0 [zfs] bpobj_iterate_impl+0xbe6/0x1410 [zfs] thread 3 holds tx->tx_sync_lock and tries to get bpo->bpo_lock: bpobj_space+0x63/0x470 [zfs] dsl_scan_active+0x340/0x3d0 [zfs] txg_sync_thread+0x3f2/0x1370 [zfs] thread 4 holds dp->dp_lock and tries to get tx->tx_sync_lock txg_kick+0x61/0x420 [zfs] dsl_pool_need_dirty_delay+0x1c7/0x3f0 [zfs] This patch is orginally from Brian Behlendorf and slightly simplified by me. It breaks this cycle in thread 4 by moving the call from dsl_pool_need_dirty_delay to txg_kick outside the section controlled by dp->dp_lock. Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Jeff Dike Closes #9094 --- module/zfs/dsl_pool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 10e967ab91e..ead755007dd 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -889,14 +889,14 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp) zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; uint64_t dirty_min_bytes = zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100; - boolean_t rv; + uint64_t dirty; mutex_enter(&dp->dp_lock); - if (dp->dp_dirty_total > dirty_min_bytes) - txg_kick(dp); - rv = (dp->dp_dirty_total > delay_min_bytes); + dirty = dp->dp_dirty_total; mutex_exit(&dp->dp_lock); - return (rv); + if (dirty > dirty_min_bytes) + txg_kick(dp); + return (dirty > delay_min_bytes); } void From 66398a4da318b21c680b1f951e696b6717dc430d Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Mon, 5 Aug 2019 10:50:20 -0700 Subject: [PATCH 160/325] Test cancelling a removal in ZTS This patch adds a new test that sanity checks cancelling a removal. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Serapheim Dimitropoulos Closes #9101 Conflicts: tests/zfs-tests/tests/functional/removal/Makefile.am --- tests/runfiles/linux.run | 2 +- .../tests/functional/removal/Makefile.am | 7 +- .../functional/removal/removal_cancel.ksh | 99 +++++++++++++++++++ 3 files changed, 104 insertions(+), 4 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/removal/removal_cancel.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 107b19c32c2..d02e15e263d 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -761,7 +761,7 @@ tags = ['functional', 'refreserv'] [tests/functional/removal] pre = -tests = ['removal_all_vdev', 'removal_check_space', +tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', 'removal_remap', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index df92e0b5ed4..2bd015e8a6e 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -10,14 +10,15 @@ # # -# Copyright (c) 2014, 2015 by Delphix. All rights reserved. +# Copyright (c) 2014, 2019 by Delphix. All rights reserved. # pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal dist_pkgdata_SCRIPTS = \ - cleanup.ksh removal_all_vdev.ksh removal_check_space.ksh \ - removal_condense_export.ksh removal_multiple_indirection.ksh \ + cleanup.ksh removal_all_vdev.ksh removal_cancel.ksh \ + removal_check_space.ksh removal_condense_export.ksh \ + removal_multiple_indirection.ksh \ removal_remap_deadlists.ksh removal_nopwrite.ksh removal_remap.ksh \ removal_reservation.ksh removal_resume_export.ksh \ removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \ diff --git a/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh new file mode 100755 index 00000000000..e7fa6abb8bc --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh @@ -0,0 +1,99 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# +# Ensure that cancelling a removal midway does not cause any +# issues like cause a panic. +# +# STRATEGY: +# +# 1. Create a pool with one vdev and do some writes on it. +# 2. Add a new vdev to the pool and start the removal of +# the first vdev. +# 3. Cancel the removal after some segments have been copied +# over to the new vdev. +# 4. Run zdb to ensure the on-disk state of the pool is ok. +# + +function cleanup +{ + # + # Reset tunable. + # + log_must set_tunable32 zfs_removal_suspend_progress 0 +} +log_onexit cleanup + +SAMPLEFILE=/$TESTDIR/00 + +# +# Create pool with one disk. +# +log_must default_setup_noexit "$REMOVEDISK" + +# +# Create a file of size 1GB and then do some random writes. +# Since randwritecomp does 8K writes we do 12500 writes +# which means we write ~100MB to the vdev. +# +log_must mkfile -n 1g $SAMPLEFILE +log_must randwritecomp $SAMPLEFILE 12500 + +# +# Add second device where all the data will be evacuated. +# +log_must zpool add -f $TESTPOOL $NOTREMOVEDISK + +# +# Start removal. +# +log_must zpool remove $TESTPOOL $REMOVEDISK + +# +# Sleep a bit and hopefully allow removal to copy some data. +# +log_must sleep 1 + +# +# Block removal. +# +log_must set_tunable32 zfs_removal_suspend_progress 1 + +# +# Only for debugging purposes in test logs. +# +log_must zpool status $TESTPOOL + +# +# Cancel removal. +# +log_must zpool remove -s $TESTPOOL + +# +# Verify on-disk state. +# +log_must zdb $TESTPOOL + +log_pass "Device removal thread cancelled successfully." From 376ca4649b4e7ce76aea64c8f3daf9445b118f87 Mon Sep 17 00:00:00 2001 From: DeHackEd Date: Mon, 5 Aug 2019 14:35:47 -0400 Subject: [PATCH 161/325] Don't wakeup unnecessarily in 'zpool events -f' ZED can prevent CPU's from properly sleeping. Rather than periodically waking up in the zevents code, just go to sleep and wait for a wakeup. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: DHE Closes #9091 --- module/zfs/fm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/module/zfs/fm.c b/module/zfs/fm.c index cc5225dcbbe..0a0fc79bd37 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -683,8 +683,7 @@ zfs_zevent_wait(zfs_zevent_t *ze) break; } - error = cv_timedwait_sig(&zevent_cv, &zevent_lock, - ddi_get_lbolt() + MSEC_TO_TICK(10)); + error = cv_wait_sig(&zevent_cv, &zevent_lock); if (signal_pending(current)) { error = SET_ERROR(EINTR); break; From 0b96952eef765d78e45bd355392b0300b21f9100 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Tue, 13 Aug 2019 09:46:12 -0400 Subject: [PATCH 162/325] Drop KMC_NOEMERGENCY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is not implemented. If it were implemented, using it would risk deadlocks on pre-3.18 kernels. Lets just drop it. Reviewed-by: Brian Behlendorf Reviewed-by: Michael Niewöhner Signed-off-by: Richard Yao Closes #9119 --- include/spl/sys/kmem_cache.h | 2 -- module/spl/spl-zlib.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/spl/sys/kmem_cache.h b/include/spl/sys/kmem_cache.h index 8fa14f67e73..bb413207def 100644 --- a/include/spl/sys/kmem_cache.h +++ b/include/spl/sys/kmem_cache.h @@ -45,7 +45,6 @@ enum { KMC_BIT_VMEM = 6, /* Use vmem cache */ KMC_BIT_SLAB = 7, /* Use Linux slab cache */ KMC_BIT_OFFSLAB = 8, /* Objects not on slab */ - KMC_BIT_NOEMERGENCY = 9, /* Disable emergency objects */ KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */ KMC_BIT_GROWING = 15, /* Growing in progress */ KMC_BIT_REAPING = 16, /* Reaping in progress */ @@ -73,7 +72,6 @@ typedef enum kmem_cbrc { #define KMC_VMEM (1 << KMC_BIT_VMEM) #define KMC_SLAB (1 << KMC_BIT_SLAB) #define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB) -#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY) #define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED) #define KMC_GROWING (1 << KMC_BIT_GROWING) #define KMC_REAPING (1 << KMC_BIT_REAPING) diff --git a/module/spl/spl-zlib.c b/module/spl/spl-zlib.c index 229e6a44b0b..62423343c1b 100644 --- a/module/spl/spl-zlib.c +++ b/module/spl/spl-zlib.c @@ -202,7 +202,7 @@ spl_zlib_init(void) zlib_workspace_cache = kmem_cache_create( "spl_zlib_workspace_cache", size, 0, NULL, NULL, NULL, NULL, NULL, - KMC_VMEM | KMC_NOEMERGENCY); + KMC_VMEM); if (!zlib_workspace_cache) return (1); From d38e4ee1428ecbf600817a7e4a0eea0e228fb1fc Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 13 Aug 2019 22:58:02 +0900 Subject: [PATCH 163/325] Change boolean-like uint8_t fields in znode_t to boolean_t Given znode_t is an in-core structure, it's more readable to have them as boolean. Also co-locate existing boolean fields with them for space efficiency (expecting 8 booleans to be packed/aligned). Reviewed-by: Brian Behlendorf Signed-off-by: Tomohiro Kusumi Closes #9092 Conflicts: include/sys/zfs_znode.h module/zfs/zfs_znode.c --- include/sys/zfs_znode.h | 16 ++++++++-------- module/zfs/zfs_ctldir.c | 16 ++++++++-------- module/zfs/zfs_vnops.c | 20 ++++++++++---------- module/zfs/zfs_znode.c | 23 +++++++++++------------ 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 01b358cc4da..ced5a73867a 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -192,10 +192,14 @@ typedef struct znode { krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ rangelock_t z_rangelock; /* file range locks */ - uint8_t z_unlinked; /* file has been unlinked */ - uint8_t z_atime_dirty; /* atime needs to be synced */ - uint8_t z_zn_prefetch; /* Prefetch znodes? */ - uint8_t z_moved; /* Has this znode been moved? */ + boolean_t z_unlinked; /* file has been unlinked */ + boolean_t z_atime_dirty; /* atime needs to be synced */ + boolean_t z_zn_prefetch; /* Prefetch znodes? */ + boolean_t z_moved; /* Has this znode been moved? */ + boolean_t z_is_sa; /* are we native sa? */ + boolean_t z_is_mapped; /* are we mmap'ed */ + boolean_t z_is_ctldir; /* are we .zfs entry */ + boolean_t z_is_stale; /* are we stale due to rollback? */ boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ @@ -213,10 +217,6 @@ typedef struct znode { uint64_t z_projid; /* project ID */ list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ - boolean_t z_is_sa; /* are we native sa? */ - boolean_t z_is_mapped; /* are we mmap'ed */ - boolean_t z_is_ctldir; /* are we .zfs entry */ - boolean_t z_is_stale; /* are we stale due to rollback? */ struct inode z_inode; /* generic vfs inode */ } znode_t; diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 8acbbb61ca9..b3cbc7d7e5f 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -463,10 +463,14 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_id = id; - zp->z_unlinked = 0; - zp->z_atime_dirty = 0; - zp->z_zn_prefetch = 0; - zp->z_moved = 0; + zp->z_unlinked = B_FALSE; + zp->z_atime_dirty = B_FALSE; + zp->z_zn_prefetch = B_FALSE; + zp->z_moved = B_FALSE; + zp->z_is_sa = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_TRUE; + zp->z_is_stale = B_FALSE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; zp->z_seq = 0; @@ -475,10 +479,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_pflags = 0; zp->z_mode = 0; zp->z_sync_cnt = 0; - zp->z_is_mapped = B_FALSE; - zp->z_is_ctldir = B_TRUE; - zp->z_is_sa = B_FALSE; - zp->z_is_stale = B_FALSE; ip->i_generation = 0; ip->i_ino = id; ip->i_mode = (S_IFDIR | S_IRWXUGO); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 3c227816428..1ad6f1588cc 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1655,7 +1655,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, zfs_fuid_sync(zfsvfs, tx); /* Add to unlinked set */ - zp->z_unlinked = 1; + zp->z_unlinked = B_TRUE; zfs_unlinked_add(zp, tx); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -1854,7 +1854,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) if (xattr_obj_unlinked) { ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2); mutex_enter(&xzp->z_lock); - xzp->z_unlinked = 1; + xzp->z_unlinked = B_TRUE; clear_nlink(ZTOI(xzp)); links = 0; error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), @@ -3407,7 +3407,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) } if ((mask & ATTR_ATIME) || zp->z_atime_dirty) { - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; ZFS_TIME_ENCODE(&ip->i_atime, atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, sizeof (atime)); @@ -4371,7 +4371,7 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, } /* unmark z_unlinked so zfs_link_create will not reject */ if (is_tmpfile) - szp->z_unlinked = 0; + szp->z_unlinked = B_FALSE; error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { @@ -4393,7 +4393,7 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, } } else if (is_tmpfile) { /* restore z_unlinked since when linking failed */ - szp->z_unlinked = 1; + szp->z_unlinked = B_TRUE; } txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); @@ -4591,7 +4591,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Preserve the mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_ctime, ctime); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; zp->z_seq++; err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); @@ -4645,7 +4645,7 @@ zfs_dirty_inode(struct inode *ip, int flags) * only need to dirty atime. */ if (flags == I_DIRTY_TIME) { - zp->z_atime_dirty = 1; + zp->z_atime_dirty = B_TRUE; goto out; } #endif @@ -4662,7 +4662,7 @@ zfs_dirty_inode(struct inode *ip, int flags) } mutex_enter(&zp->z_lock); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); @@ -4707,7 +4707,7 @@ zfs_inactive(struct inode *ip) return; } - if (zp->z_atime_dirty && zp->z_unlinked == 0) { + if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); @@ -4720,7 +4720,7 @@ zfs_inactive(struct inode *ip) mutex_enter(&zp->z_lock); (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), (void *)&atime, sizeof (atime), tx); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; mutex_exit(&zp->z_lock); dmu_tx_commit(tx); } diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 91162e857d4..498547758b1 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -135,7 +135,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_acl_cached = NULL; zp->z_xattr_cached = NULL; zp->z_xattr_parent = 0; - zp->z_moved = 0; + zp->z_moved = B_FALSE; return (0); } @@ -539,19 +539,18 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT(zp->z_dirlocks == NULL); ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); - zp->z_moved = 0; - zp->z_suspended = B_FALSE; + zp->z_unlinked = B_FALSE; + zp->z_atime_dirty = B_FALSE; + zp->z_moved = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_FALSE; + zp->z_is_stale = B_FALSE; zp->z_sa_hdl = NULL; - zp->z_unlinked = 0; - zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_is_mapped = B_FALSE; - zp->z_is_ctldir = B_FALSE; - zp->z_is_stale = B_FALSE; zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); @@ -1264,7 +1263,7 @@ zfs_rezget(znode_t *zp) zfs_set_inode_flags(zp, ZTOI(zp)); zp->z_blksz = doi.doi_data_block_size; - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; zfs_inode_update(zp); /* @@ -1884,9 +1883,9 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); - rootzp->z_moved = 0; - rootzp->z_unlinked = 0; - rootzp->z_atime_dirty = 0; + rootzp->z_unlinked = B_FALSE; + rootzp->z_atime_dirty = B_FALSE; + rootzp->z_moved = B_FALSE; rootzp->z_is_sa = USE_SA(version, os); rootzp->z_pflags = 0; From 628fd31d26fa6e85b50578d34e3ca31dad956e23 Mon Sep 17 00:00:00 2001 From: George Wilson Date: Tue, 13 Aug 2019 08:11:57 -0600 Subject: [PATCH 164/325] spa_load_verify() may consume too much memory When a pool is imported it will scan the pool to verify the integrity of the data and metadata. The amount it scans will depend on the import flags provided. On systems with small amounts of memory or when importing a pool from the crash kernel, it's possible for spa_load_verify to issue too many I/Os that it consumes all the memory of the system resulting in an OOM message or a hang. To prevent this, we limit the amount of memory that the initial pool scan can consume. This change will, by default, use 1/16th of the ARC for scan I/Os to prevent running the system out of memory during import. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: Serapheim Dimitropoulos Signed-off-by: George Wilson george.wilson@delphix.com External-issue: DLPX-65237 External-issue: DLPX-65238 Closes #9146 --- cmd/zdb/zdb.c | 15 ++++++++------- include/sys/spa_impl.h | 4 +++- man/man5/zfs-module-parameters.5 | 8 ++++---- module/zfs/spa.c | 24 +++++++++++++++--------- 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 4b07cdb8e0c..3e0e0575373 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -109,7 +109,7 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); uint64_t *zopt_object = NULL; static unsigned zopt_objects = 0; -uint64_t max_inflight = 1000; +uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ static int leaked_objects = 0; static range_tree_t *mos_refd_objs; @@ -3449,7 +3449,7 @@ zdb_blkptr_done(zio_t *zio) abd_free(zio->io_abd); mutex_enter(&spa->spa_scrub_lock); - spa->spa_load_verify_ios--; + spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); cv_broadcast(&spa->spa_scrub_io_cv); if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { @@ -3520,9 +3520,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, flags |= ZIO_FLAG_SPECULATIVE; mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_ios > max_inflight) + while (spa->spa_load_verify_bytes > max_inflight_bytes) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_load_verify_ios++; + spa->spa_load_verify_bytes += size; mutex_exit(&spa->spa_scrub_lock); zio_nowait(zio_read(NULL, spa, bp, abd, size, @@ -4285,6 +4285,7 @@ dump_block_stats(spa_t *spa) ZIO_FLAG_GODFATHER); } } + ASSERT0(spa->spa_load_verify_bytes); /* * Done after zio_wait() since zcb_haderrors is modified in @@ -5933,10 +5934,10 @@ main(int argc, char **argv) break; /* NB: Sort single match options below. */ case 'I': - max_inflight = strtoull(optarg, NULL, 0); - if (max_inflight == 0) { + max_inflight_bytes = strtoull(optarg, NULL, 0); + if (max_inflight_bytes == 0) { (void) fprintf(stderr, "maximum number " - "of inflight I/Os must be greater " + "of inflight bytes must be greater " "than 0\n"); usage(); } diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 9ab107599fd..659c69738fa 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -270,7 +270,9 @@ struct spa { boolean_t spa_extreme_rewind; /* rewind past deferred frees */ kmutex_t spa_scrub_lock; /* resilver/scrub lock */ uint64_t spa_scrub_inflight; /* in-flight scrub bytes */ - uint64_t spa_load_verify_ios; /* in-flight verification IOs */ + + /* in-flight verification bytes */ + uint64_t spa_load_verify_bytes; kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */ uint8_t spa_scrub_active; /* active or suspended? */ uint8_t spa_scrub_type; /* type of scrub we're doing */ diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 77b4c2801e0..3395175d6eb 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -505,13 +505,13 @@ Default value: \fB1\fR. .sp .ne 2 .na -\fBspa_load_verify_maxinflight\fR (int) +\fBspa_load_verify_shift\fR (int) .ad .RS 12n -Maximum concurrent I/Os during the traversal performed during an "extreme -rewind" (\fB-X\fR) pool import. +Sets the maximum number of bytes to consume during pool import to the log2 +fraction of the target arc size. .sp -Default value: \fB10000\fR. +Default value: \fB4\fR. .RE .sp diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 4e322e34b08..a9efe254b6b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -90,6 +90,7 @@ #include #include #include +#include #endif /* _KERNEL */ #include "zfs_prop.h" @@ -2106,16 +2107,16 @@ spa_load_verify_done(zio_t *zio) } mutex_enter(&spa->spa_scrub_lock); - spa->spa_load_verify_ios--; + spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); } /* - * Maximum number of concurrent scrub i/os to create while verifying - * a pool while importing it. + * Maximum number of inflight bytes is the log2 faction of the arc size. + * By default, we set it to 1/16th of the arc. */ -int spa_load_verify_maxinflight = 10000; +int spa_load_verify_shift = 4; int spa_load_verify_metadata = B_TRUE; int spa_load_verify_data = B_TRUE; @@ -2136,13 +2137,14 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (!BP_IS_METADATA(bp) && !spa_load_verify_data) return (0); + int maxinflight_bytes = arc_target_bytes() >> spa_load_verify_shift; zio_t *rio = arg; size_t size = BP_GET_PSIZE(bp); mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_ios >= spa_load_verify_maxinflight) + while (spa->spa_load_verify_bytes >= maxinflight_bytes) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_load_verify_ios++; + spa->spa_load_verify_bytes += size; mutex_exit(&spa->spa_scrub_lock); zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, @@ -2195,12 +2197,14 @@ spa_load_verify(spa_t *spa) "spa_load_verify_metadata=%u)", spa_load_verify_data, spa_load_verify_metadata); } + error = traverse_pool(spa, spa->spa_verify_min_txg, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); } (void) zio_wait(rio); + ASSERT0(spa->spa_load_verify_bytes); spa->spa_load_meta_errors = sle.sle_meta_count; spa->spa_load_data_errors = sle.sle_data_count; @@ -8769,9 +8773,11 @@ EXPORT_SYMBOL(spa_event_notify); #endif #if defined(_KERNEL) -module_param(spa_load_verify_maxinflight, int, 0644); -MODULE_PARM_DESC(spa_load_verify_maxinflight, - "Max concurrent traversal I/Os while verifying pool during import -X"); +/* BEGIN CSTYLED */ +module_param(spa_load_verify_shift, int, 0644); +MODULE_PARM_DESC(spa_load_verify_shift, "log2(fraction of arc that can " + "be used by inflight I/Os when verifying pool during import"); +/* END CSTYLED */ module_param(spa_load_verify_metadata, int, 0644); MODULE_PARM_DESC(spa_load_verify_metadata, From 5549a537dda49b1cf280f7fdee5ac7f0cc217438 Mon Sep 17 00:00:00 2001 From: Prakash Surya Date: Tue, 13 Aug 2019 20:18:53 -0700 Subject: [PATCH 165/325] Fix device expansion when VM is powered off When running on an ESXi based VM, I've found that "zpool online -e" will not expand the zpool, if the disk was expanded in ESXi while the VM was powered off. For example, take the following scenario: 1. VM running on top of VMware ESXi 2. ZFS pool created with a given device "sda" of size 8GB 3. VM powered off 4. Device "sda" size expanded to 16GB 5. VM powered on 6. "zpool online -e" used on device "sda" In this situation, after (2) the zpool will be roughly 8GB in size. After (6), the expectation is the zpool's size will expand to roughly 16GB in size; i.e. expand to the new size of the "sda" device. Unfortunately, I've seen that after (6), the zpool size does not change. What's happening is after (5), the EFI label of the "sda" device will be such that fields "efi_last_u_lba", "efi_last_lba", and "efi_altern_lba" all reflect the new size of the disk; i.e. "33554398", "33554431", and "33554431" respectively. Thus, the check that we perform in "efi_use_whole_disk": if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba >= efi_label->efi_last_lba)) { This will return true, and then we return from the function without having expanded the size of the zpool/device. In contrast, if we remove steps (3) and (5) in the sequence above, i.e. the device is expanded while the VM is powered on, things change. In that case, the fields "efi_last_u_lba" and "efi_altern_lba" do not change (i.e. they still reflect the old 8GB device size), but the "efi_last_lba" field does change (i.e. it now reflects the new 16GB device size). Thus, when we evaluate the same conditional in "efi_use_whole_disk", it'll return false, so the zpool is expanded. Taking all of this into account, this PR updates "efi_use_whole_disk" to properly expand the zpool when the underlying disk is expanded while the VM is powered off. Reviewed-by: Brian Behlendorf Reviewed-by: George Wilson Reviewed-by: Don Brady Signed-off-by: Prakash Surya Closes #9111 --- lib/libefi/rdwr_efi.c | 112 ++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 25 deletions(-) diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 1d8f631c837..93c79277dae 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -42,6 +42,7 @@ #include #include #include +#include #include static struct uuid_to_ptag { @@ -1113,7 +1114,9 @@ efi_use_whole_disk(int fd) int i; uint_t resv_index = 0, data_index = 0; diskaddr_t resv_start = 0, data_start = 0; - diskaddr_t difference; + diskaddr_t data_size, limit, difference; + boolean_t sync_needed = B_FALSE; + uint_t nblocks; rval = efi_alloc_and_read(fd, &efi_label); if (rval < 0) { @@ -1122,13 +1125,67 @@ efi_use_whole_disk(int fd) return (rval); } + /* + * Find the last physically non-zero partition. + * This should be the reserved partition. + */ + for (i = 0; i < efi_label->efi_nparts; i ++) { + if (resv_start < efi_label->efi_parts[i].p_start) { + resv_start = efi_label->efi_parts[i].p_start; + resv_index = i; + } + } + + /* + * Find the last physically non-zero partition before that. + * This is the data partition. + */ + for (i = 0; i < resv_index; i ++) { + if (data_start < efi_label->efi_parts[i].p_start) { + data_start = efi_label->efi_parts[i].p_start; + data_index = i; + } + } + data_size = efi_label->efi_parts[data_index].p_size; + + /* + * See the "efi_alloc_and_init" function for more information + * about where this "nblocks" value comes from. + */ + nblocks = efi_label->efi_first_u_lba - 1; + + /* + * Determine if the EFI label is out of sync. We check that: + * + * 1. the data partition ends at the limit we set, and + * 2. the reserved partition starts at the limit we set. + * + * If either of these conditions is not met, then we need to + * resync the EFI label. + * + * The limit is the last usable LBA, determined by the last LBA + * and the first usable LBA fields on the EFI label of the disk + * (see the lines directly above). Additionally, we factor in + * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and + * P2ALIGN it to ensure the partition boundaries are aligned + * (for performance reasons). The alignment should match the + * alignment used by the "zpool_label_disk" function. + */ + limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, + PARTITION_END_ALIGNMENT); + if (data_start + data_size != limit || resv_start != limit) + sync_needed = B_TRUE; + + if (efi_debug && sync_needed) + (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); + /* * If alter_lba is 1, we are using the backup label. * Since we can locate the backup label by disk capacity, * there must be no unallocated space. */ if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba - >= efi_label->efi_last_lba)) { + >= efi_label->efi_last_lba && !sync_needed)) { if (efi_debug) { (void) fprintf(stderr, "efi_use_whole_disk: requested space not found\n"); @@ -1137,19 +1194,6 @@ efi_use_whole_disk(int fd) return (VT_ENOSPC); } - difference = efi_label->efi_last_lba - efi_label->efi_altern_lba; - - /* - * Find the last physically non-zero partition. - * This should be the reserved partition. - */ - for (i = 0; i < efi_label->efi_nparts; i ++) { - if (resv_start < efi_label->efi_parts[i].p_start) { - resv_start = efi_label->efi_parts[i].p_start; - resv_index = i; - } - } - /* * Verify that we've found the reserved partition by checking * that it looks the way it did when we created it in zpool_label_disk. @@ -1167,17 +1211,36 @@ efi_use_whole_disk(int fd) return (VT_ENOSPC); } - /* - * Find the last physically non-zero partition before that. - * This is the data partition. - */ - for (i = 0; i < resv_index; i ++) { - if (data_start < efi_label->efi_parts[i].p_start) { - data_start = efi_label->efi_parts[i].p_start; - data_index = i; + if (data_start + data_size != resv_start) { + if (efi_debug) { + (void) fprintf(stderr, + "efi_use_whole_disk: " + "data_start (%lli) + " + "data_size (%lli) != " + "resv_start (%lli)\n", + data_start, data_size, resv_start); + } + + return (VT_EINVAL); + } + + if (limit < resv_start) { + if (efi_debug) { + (void) fprintf(stderr, + "efi_use_whole_disk: " + "limit (%lli) < resv_start (%lli)\n", + limit, resv_start); } + + return (VT_EINVAL); } + difference = limit - resv_start; + + if (efi_debug) + (void) fprintf(stderr, + "efi_use_whole_disk: difference is %lli\n", difference); + /* * Move the reserved partition. There is currently no data in * here except fabricated devids (which get generated via @@ -1185,7 +1248,7 @@ efi_use_whole_disk(int fd) */ efi_label->efi_parts[data_index].p_size += difference; efi_label->efi_parts[resv_index].p_start += difference; - efi_label->efi_last_u_lba += difference; + efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; rval = efi_write(fd, efi_label); if (rval < 0) { @@ -1202,7 +1265,6 @@ efi_use_whole_disk(int fd) return (0); } - /* * write EFI label and backup label */ From 93fd9101c9cc46879d6512cbe3363723d0c5d7c1 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Tue, 13 Aug 2019 20:24:43 -0700 Subject: [PATCH 166/325] Prevent race in blkptr_verify against device removal When we check the vdev of the blkptr in zfs_blkptr_verify, we can run into a race condition where that vdev is temporarily unavailable. This happens when a device removal operation and the old vdev_t has been removed from the array, but the new indirect vdev has not yet been inserted. We hold the spa_config_lock while doing our sensitive verification. To ensure that we don't deadlock, we only grab the lock if we don't have config_writer held. In addition, I had to const the tags of the refcounts and the spa_config_lock arguments. Reviewed-by: Brian Behlendorf Reviewed-by: Serapheim Dimitropoulos Signed-off-by: Paul Dagnelie Closes #9112 --- include/sys/refcount.h | 19 ++++++++++--------- include/sys/spa.h | 5 ++--- module/zfs/refcount.c | 19 ++++++++++--------- module/zfs/spa_misc.c | 4 ++-- module/zfs/zio.c | 16 +++++++++++----- 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/include/sys/refcount.h b/include/sys/refcount.h index e982faeba0f..c8f58623039 100644 --- a/include/sys/refcount.h +++ b/include/sys/refcount.h @@ -44,7 +44,7 @@ extern "C" { #ifdef ZFS_DEBUG typedef struct reference { list_node_t ref_link; - void *ref_holder; + const void *ref_holder; uint64_t ref_number; uint8_t *ref_removed; } reference_t; @@ -70,16 +70,17 @@ void zfs_refcount_destroy(zfs_refcount_t *); void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t); int zfs_refcount_is_zero(zfs_refcount_t *); int64_t zfs_refcount_count(zfs_refcount_t *); -int64_t zfs_refcount_add(zfs_refcount_t *, void *); -int64_t zfs_refcount_remove(zfs_refcount_t *, void *); -int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *); -int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *); +int64_t zfs_refcount_add(zfs_refcount_t *, const void *); +int64_t zfs_refcount_remove(zfs_refcount_t *, const void *); +int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *); +int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *); void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *); -void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *); +void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *, + const void *); void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t, - void *, void *); -boolean_t zfs_refcount_held(zfs_refcount_t *, void *); -boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *); + const void *, const void *); +boolean_t zfs_refcount_held(zfs_refcount_t *, const void *); +boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *); void zfs_refcount_init(void); void zfs_refcount_fini(void); diff --git a/include/sys/spa.h b/include/sys/spa.h index d43801de5dd..42bf9dcc104 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -976,8 +976,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid, /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_exit(spa_t *spa, int locks, void *tag); +extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw); +extern void spa_config_exit(spa_t *spa, int locks, const void *tag); extern int spa_config_held(spa_t *spa, int locks, krw_t rw); /* Pool vdev add/remove lock */ @@ -1091,7 +1091,6 @@ extern boolean_t spa_has_checkpoint(spa_t *spa); extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa); extern boolean_t spa_suspend_async_destroy(spa_t *spa); extern uint64_t spa_min_claim_txg(spa_t *spa); -extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp); extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp); typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size, diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index bcaa6d38753..a7e46d3790a 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc) } int64_t -zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { reference_t *ref = NULL; int64_t count; @@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_add(zfs_refcount_t *rc, void *holder) +zfs_refcount_add(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_add_many(rc, 1, holder)); } int64_t -zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, + const void *holder) { reference_t *ref; int64_t count; @@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_remove(zfs_refcount_t *rc, void *holder) +zfs_refcount_remove(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_remove_many(rc, 1, holder)); } @@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) void zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, - void *current_holder, void *new_holder) + const void *current_holder, const void *new_holder) { reference_t *ref; boolean_t found = B_FALSE; @@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, } void -zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, - void *new_holder) +zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder, + const void *new_holder) { return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder, new_holder)); @@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, * might be held. */ boolean_t -zfs_refcount_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; @@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder) * since the reference might not be held. */ boolean_t -zfs_refcount_not_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 185b7020148..ecdb3c61519 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) +spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) { int wlocks_held = 0; @@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_exit(spa_t *spa, int locks, void *tag) +spa_config_exit(spa_t *spa, int locks, const void *tag) { for (int i = SCL_LOCKS - 1; i >= 0; i--) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 5638f531938..aac0392a4ad 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -881,8 +881,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) return (zio_null(NULL, spa, NULL, done, private, flags)); } -void -zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) +static void +zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held) { if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { zfs_panic_recover("blkptr at %p has invalid TYPE %llu", @@ -921,6 +921,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) if (!spa->spa_trust_config) return; + if (!config_held) + spa_config_enter(spa, SCL_VDEV, bp, RW_READER); + else + ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER)); /* * Pool-specific checks. * @@ -969,6 +973,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) bp, i, (longlong_t)offset); } } + if (!config_held) + spa_config_exit(spa, SCL_VDEV, bp); } boolean_t @@ -1008,7 +1014,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, @@ -1101,7 +1107,7 @@ void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, B_FALSE); /* * The check for EMBEDDED is a performance optimization. We @@ -1166,7 +1172,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); if (BP_IS_EMBEDDED(bp)) return (zio_null(pio, spa, NULL, NULL, NULL, 0)); From 9d4ca81b6f7b17dd44732ce93180ccc991b8de5f Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Thu, 15 Aug 2019 10:27:13 -0400 Subject: [PATCH 167/325] Make txg_wait_synced conditional in zfsvfs_teardown The call to txg_wait_synced in zfsvfs_teardown should be made conditional on the objset having dirty data. This can prevent unnecessary txg_wait_synced during some unmount operations. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Paul Zuchowski Closes #9115 --- module/zfs/zfs_vfsops.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 489f12b7fc0..6348cac7dcc 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1785,8 +1785,17 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * Evict cached data. We must write out any dirty data before * disowning the dataset. */ - if (!zfs_is_readonly(zfsvfs)) + objset_t *os = zfsvfs->z_os; + boolean_t os_dirty = B_FALSE; + for (int t = 0; t < TXG_SIZE; t++) { + if (dmu_objset_is_dirty(os, t)) { + os_dirty = B_TRUE; + break; + } + } + if (!zfs_is_readonly(zfsvfs) && os_dirty) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + } dmu_objset_evict_dbufs(zfsvfs->z_os); return (0); From dd6d0bdbb32a4dcb8aee9f5f603fdfde84fb7170 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 15 Aug 2019 07:44:57 -0700 Subject: [PATCH 168/325] Assert that a dnode's bonuslen never exceeds its recorded size This patch introduces an assertion that can catch pitfalls in development where there is a mismatch between the size of reads and writes between a *_phys structure and its respective in-core structure when bonus buffers are used. This debugging-aid should be complementary to the verification done by ztest in ztest_verify_dnode_bt(). A side to this patch is that we now clear out any extra bytes past a bonus buffer's new size when the buffer is shrinking. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: Serapheim Dimitropoulos Closes #8348 --- module/zfs/dbuf.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ module/zfs/dnode.c | 8 ++++++++ 2 files changed, 52 insertions(+) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 94c49b3ef0a..6cd7ae9567f 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -3785,6 +3785,46 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) zio_nowait(zio); } +#ifdef ZFS_DEBUG +/* + * Verify that the size of the data in our bonus buffer does not exceed + * its recorded size. + * + * The purpose of this verification is to catch any cases in development + * where the size of a phys structure (i.e space_map_phys_t) grows and, + * due to incorrect feature management, older pools expect to read more + * data even though they didn't actually write it to begin with. + * + * For a example, this would catch an error in the feature logic where we + * open an older pool and we expect to write the space map histogram of + * a space map with size SPACE_MAP_SIZE_V0. + */ +static void +dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr) +{ + dnode_t *dn = DB_DNODE(dr->dr_dbuf); + + /* + * Encrypted bonus buffers can have data past their bonuslen. + * Skip the verification of these blocks. + */ + if (DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)) + return; + + uint16_t bonuslen = dn->dn_phys->dn_bonuslen; + uint16_t maxbonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); + ASSERT3U(bonuslen, <=, maxbonuslen); + + arc_buf_t *datap = dr->dt.dl.dr_data; + char *datap_end = ((char *)datap) + bonuslen; + char *datap_max = ((char *)datap) + maxbonuslen; + + /* ensure that everything is zero after our data */ + for (; datap_end < datap_max; datap_end++) + ASSERT(*datap_end == 0); +} +#endif + /* * dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is * critical the we not allow the compiler to inline this function in to @@ -3861,6 +3901,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) DN_MAX_BONUS_LEN(dn->dn_phys)); DB_DNODE_EXIT(db); +#ifdef ZFS_DEBUG + dbuf_sync_leaf_verify_bonus_dnode(dr); +#endif + if (*datap != db->db.db_data) { int slots = DB_DNODE(db)->dn_num_slots; int bonuslen = DN_SLOTS_TO_BONUSLEN(slots); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index cc7bc5ec82c..097eaf3ee6f 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -389,6 +389,14 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) rw_enter(&dn->dn_struct_rwlock, RW_WRITER); ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) - (dn->dn_nblkptr-1) * sizeof (blkptr_t)); + + if (newsize < dn->dn_bonuslen) { + /* clear any data after the end of the new size */ + size_t diff = dn->dn_bonuslen - newsize; + char *data_end = ((char *)dn->dn_bonus->db.db_data) + newsize; + bzero(data_end, diff); + } + dn->dn_bonuslen = newsize; if (newsize == 0) dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN; From b78d32cc259d588947c7da9ac269bdba9620142a Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Thu, 15 Aug 2019 17:36:24 -0600 Subject: [PATCH 169/325] Improve write performance by using dmu_read_by_dnode() In zfs_log_write(), we can use dmu_read_by_dnode() rather than dmu_read() thus avoiding unnecessary dnode_hold() calls. We get a 2-5% performance gain for large sequential_writes tests, >=128K writes to files with recordsize=8K. Testing done on Ubuntu 18.04 with 4.15 kernel, 8vCPUs and SSD storage on VMware ESX. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Nguyen Closes #9156 --- module/zfs/zfs_log.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 622ce08acd2..41b663b65fb 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -510,6 +511,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag, zil_callback_t callback, void *callback_data) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); uint32_t blocksize = zp->z_blksz; itx_wr_state_t write_state; uintptr_t fsync_cnt; @@ -556,13 +558,16 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, sizeof (*lr) + (wr_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; - if (wr_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, - zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + + DB_DNODE_ENTER(db); + if (wr_state == WR_COPIED && dmu_read_by_dnode(DB_DNODE(db), + off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; } + DB_DNODE_EXIT(db); itx->itx_wr_state = wr_state; lr->lr_foid = zp->z_id; From dfa4d3d986ad19584e985ae09bd2c2cf8c7606ab Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 15 Aug 2019 16:53:53 -0700 Subject: [PATCH 170/325] dmu_tx_wait() hang likely due to cv_signal() in dsl_pool_dirty_delta() Even though the bug's writeup (Github issue #9136) is very detailed, we still don't know exactly how we got to that state, thus I wasn't able to reproduce the bug. That said, we can make an educated guess combining the information on filled issue with the code. From the fact that `dp_dirty_total` was 0 (which is less than `zfs_dirty_data_max`) we know that there was one thread that set it to 0 and then signaled one of the waiters of `dp_spaceavail_cv` [see `dsl_pool_dirty_delta()` which is also the only place that `dp_dirty_total` is changed]. Thus, the only logical explaination then for the bug being hit is that the waiter that just got awaken didn't go through `dsl_pool_dirty_data()`. Given that this function is only called by `dsl_pool_dirty_space()` or `dsl_pool_undirty_space()` I can only think of two possible ways of the above scenario happening: [1] The waiter didn't call into any of the two functions - which I find highly unlikely (i.e. why wait on `dp_spaceavail_cv` to begin with?). [2] The waiter did call in one of the above function but it passed 0 as the space/delta to be dirtied (or undirtied) and then the callee returned immediately (e.g both `dsl_pool_dirty_space()` and `dsl_pool_undirty_space()` return immediately when space is 0). In any case and no matter how we got there, the easy fix would be to just broadcast to all waiters whenever `dp_dirty_total` hits 0. That said and given that we've never hit this before, it would make sense to think more on why the above situation occured. Attempting to mimic what Prakash was doing in the issue filed, I created a dataset with `sync=always` and started doing contiguous writes in a file within that dataset. I observed with DTrace that even though we update the pool's dirty data accounting when we would dirty stuff, the accounting wouldn't be decremented incrementally as we were done with the ZIOs of those writes (the reason being that `dbuf_write_physdone()` isn't be called as we go through the override code paths, and thus `dsl_pool_undirty_space()` is never called). As a result we'd have to wait until we get to `dsl_pool_sync()` where we zero out all dirty data accounting for the pool and the current TXG's metadata. In addition, as Matt noted and I later verified, the same issue would arise when using dedup. In both cases (sync & dedup) we shouldn't have to wait until `dsl_pool_sync()` zeros out the accounting data. According to the comment in that part of the code, the reasons why we do the zeroing, have nothing to do with what we observe: ```` /* * We have written all of the accounted dirty data, so our * dp_space_towrite should now be zero. However, some seldom-used * code paths do not adhere to this (e.g. dbuf_undirty(), also * rounding error in dbuf_write_physdone). * Shore up the accounting of any dirtied space now. */ dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); ```` Ideally what we want to do is to undirty in the accounting exactly what we dirty (I use the word ideally as we can still have rounding errors). This would make the behavior of the system more clear and predictable. Another interesting issue that I observed with DTrace was that we wouldn't update any of the pool's dirty data accounting whenever we would dirty and/or undirty MOS data. In addition, every time we would change the size of a dbuf through `dbuf_new_size()` we wouldn't update the accounted space dirtied in the appropriate dirty record, so when ZIOs are done we would undirty less that we dirtied from the pool's accounting point of view. For the first two issues observed (sync & dedup) this patch ensures that we still update the pool's accounting when we undirty data, regardless of the write being physical or not. For changes in the MOS, we first ensure to zero out the pool's dirty data accounting in `dsl_pool_sync()` after we synced the MOS. Then we can go ahead and enable the update of the pool's dirty data accounting wheneve we change MOS data. Another fix is that we now update the accounting explicitly for counting errors in `dbuf_write_done()`. Finally, `dbuf_new_size()` updates the accounted space of the appropriate dirty record correctly now. The problem is that we still don't know how the bug came up in the issue filled. That said the issues fixed seem to be very relevant, so instead of going with the broadcasting solution right away, I decided to leave this patch as is. Reviewed-by: Brian Behlendorf Reviewed-by: Prakash Surya Signed-off-by: Serapheim Dimitropoulos External-issue: DLPX-47285 Closes #9137 --- module/zfs/dbuf.c | 34 +++++++++++++++++++++++++++++----- module/zfs/dmu.c | 3 +++ module/zfs/dmu_objset.c | 17 +++++++++++++---- module/zfs/dsl_pool.c | 24 +++++++++++++++--------- 4 files changed, 60 insertions(+), 18 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 6cd7ae9567f..2bc995ac66c 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1826,9 +1826,11 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) db->db.db_size = size; if (db->db_level == 0) { - ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); db->db_last_dirty->dt.dl.dr_data = buf; } + ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); + ASSERT3U(db->db_last_dirty->dr_accounted, ==, osize); + db->db_last_dirty->dr_accounted = size; mutex_exit(&db->db_mtx); dmu_objset_willuse_space(dn->dn_objset, size - osize, tx); @@ -2041,7 +2043,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); } - if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL) + if (db->db_blkid != DMU_BONUS_BLKID) dr->dr_accounted = db->db.db_size; dr->dr_dbuf = db; dr->dr_txg = tx->tx_txg; @@ -4210,8 +4212,7 @@ dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) /* * The callback will be called io_phys_children times. Retire one * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dsl_pool_sync()'s call to - * dsl_pool_undirty_space(). + * error will be cleaned up by dbuf_write_done(). */ delta = dr->dr_accounted / zio->io_phys_children; dsl_pool_undirty_space(dp, delta, zio->io_txg); @@ -4294,13 +4295,36 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); cv_broadcast(&db->db_changed); ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; db->db_data_pending = NULL; dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); + + /* + * If we didn't do a physical write in this ZIO and we + * still ended up here, it means that the space of the + * dbuf that we just released (and undirtied) above hasn't + * been marked as undirtied in the pool's accounting. + * + * Thus, we undirty that space in the pool's view of the + * world here. For physical writes this type of update + * happens in dbuf_write_physdone(). + * + * If we did a physical write, cleanup any rounding errors + * that came up due to writing multiple copies of a block + * on disk [see dbuf_write_physdone()]. + */ + if (zio->io_phys_children == 0) { + dsl_pool_undirty_space(dmu_objset_pool(os), + dr->dr_accounted, zio->io_txg); + } else { + dsl_pool_undirty_space(dmu_objset_pool(os), + dr->dr_accounted % zio->io_phys_children, zio->io_txg); + } + + kmem_free(dr, sizeof (dbuf_dirty_record_t)); } static void diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b4131d91781..a086f5ca6d9 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1098,6 +1098,9 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_rele_array(dbp, numbufs, FTAG); } +/* + * Note: Lustre is an external consumer of this interface. + */ void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx) diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 29ed45a55dc..88e97e1a310 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -3002,9 +3002,17 @@ dmu_fsname(const char *snapname, char *buf) } /* - * Call when we think we're going to write/free space in open context to track - * the amount of dirty data in the open txg, which is also the amount - * of memory that can not be evicted until this txg syncs. + * Call when we think we're going to write/free space in open context + * to track the amount of dirty data in the open txg, which is also the + * amount of memory that can not be evicted until this txg syncs. + * + * Note that there are two conditions where this can be called from + * syncing context: + * + * [1] When we just created the dataset, in which case we go on with + * updating any accounting of dirty data as usual. + * [2] When we are dirtying MOS data, in which case we only update the + * pool's accounting of dirty data. */ void dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx) @@ -3014,8 +3022,9 @@ dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx) if (ds != NULL) { dsl_dir_willuse_space(ds->ds_dir, aspace, tx); - dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx); } + + dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx); } #if defined(_KERNEL) diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index ead755007dd..d8cf4d209e1 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -659,15 +659,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) } VERIFY0(zio_wait(zio)); - /* - * We have written all of the accounted dirty data, so our - * dp_space_towrite should now be zero. However, some seldom-used - * code paths do not adhere to this (e.g. dbuf_undirty(), also - * rounding error in dbuf_write_physdone). - * Shore up the accounting of any dirtied space now. - */ - dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); - /* * Update the long range free counter after * we're done syncing user data @@ -762,6 +753,21 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) dsl_pool_sync_mos(dp, tx); } + /* + * We have written all of the accounted dirty data, so our + * dp_space_towrite should now be zero. However, some seldom-used + * code paths do not adhere to this (e.g. dbuf_undirty()). Shore up + * the accounting of any dirtied space now. + * + * Note that, besides any dirty data from datasets, the amount of + * dirty data in the MOS is also accounted by the pool. Therefore, + * we want to do this cleanup after dsl_pool_sync_mos() so we don't + * attempt to update the accounting for the same dirty data twice. + * (i.e. at this point we only update the accounting for the space + * that we know that we "leaked"). + */ + dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); + /* * If we modify a dataset in the same txg that we want to destroy it, * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. From c75d3968bd4f2d6efeb7d1739fc5dde661b34f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Fri, 16 Aug 2019 17:02:32 +0200 Subject: [PATCH 171/325] initramfs: fixes for (debian) initramfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * contrib/initramfs: include /etc/default/zfs and /etc/zfs/zfs-functions At least debian needs /etc/default/zfs and /etc/zfs/zfs-functions for its initramfs. Include both in build when initramfs is configured. * contrib/initramfs: include 60-zvol.rules and zvol_id Include 60-zvol.rules and zvol_id and set udev as predependency instead of debians zdev. This makes debians additional zdev hook unneeded. * Correct initconfdir substitution for some distros Not every Linux distro is using @sysconfdir@/default but @initconfdir@ which is already determined by configure. Let's use it. * systemd: prevent possible conflict between systemd and sysvinit Systemd will not load a sysvinit service if a unit exists with the same name. This prevents conflicts between sysvinit and systemd. In ZFS there is one sysvinit service that does not have a systemd service but a target counterpart, zfs-import.target. Usually it does not make any sense to install both but it is possisble. Let's prevent any conflict by masking zfs-import.service by default. This does not harm even if init.d/zfs-import does not exist. Reviewed-by: Chris Wedgwood Reviewed-by: Brian Behlendorf Tested-by: Alex Ingram Tested-by: Dreamcat4 Signed-off-by: Michael Niewöhner Closes #7904 Closes #9089 --- contrib/initramfs/Makefile.am | 16 +++++++++++----- contrib/initramfs/hooks/zfs.in | 6 ++++-- etc/systemd/system/Makefile.am | 4 ++++ rpm/generic/zfs.spec.in | 13 +++++++++++-- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index fefd676ce0f..52bdeb2afe5 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -6,10 +6,15 @@ initrd_SCRIPTS = \ SUBDIRS = hooks scripts EXTRA_DIST = \ + $(top_srcdir)/etc/init.d/zfs \ + $(top_srcdir)/etc/init.d/zfs-functions \ $(top_srcdir)/contrib/initramfs/conf.d/zfs \ $(top_srcdir)/contrib/initramfs/conf-hooks.d/zfs \ $(top_srcdir)/contrib/initramfs/README.initramfs.markdown +$(top_srcdir)/etc/init.d/zfs $(top_srcdir)/etc/init.d/zfs-functions: + $(MAKE) -C $(top_srcdir)/etc/init.d zfs zfs-functions + install-initrdSCRIPTS: $(EXTRA_DIST) for d in conf.d conf-hooks.d scripts/local-top; do \ $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ @@ -21,8 +26,9 @@ install-initrdSCRIPTS: $(EXTRA_DIST) cp $(top_builddir)/contrib/initramfs/$$d/zfs \ $(DESTDIR)$(initrddir)/$$d/; \ done - if [ -f $(top_builddir)/etc/init.d/zfs ]; then \ - $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ - cp $(top_builddir)/etc/init.d/zfs \ - $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ - fi + $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ + cp $(top_builddir)/etc/init.d/zfs \ + $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ + $(MKDIR_P) $(DESTDIR)$(sysconfdir)/zfs; \ + cp $(top_builddir)/etc/init.d/zfs-functions \ + $(DESTDIR)$(sysconfdir)/zfs/ diff --git a/contrib/initramfs/hooks/zfs.in b/contrib/initramfs/hooks/zfs.in index e35354141d8..15f23c908b2 100755 --- a/contrib/initramfs/hooks/zfs.in +++ b/contrib/initramfs/hooks/zfs.in @@ -4,16 +4,18 @@ # # This hook installs udev rules for ZoL. -PREREQ="zdev" +PREREQ="udev" # These prerequisites are provided by the zfsutils package. The zdb utility is # not strictly required, but it can be useful at the initramfs recovery prompt. COPY_EXEC_LIST="@sbindir@/zdb @sbindir@/zpool @sbindir@/zfs" COPY_EXEC_LIST="$COPY_EXEC_LIST @mounthelperdir@/mount.zfs @udevdir@/vdev_id" +COPY_EXEC_LIST="$COPY_EXEC_LIST @udevdir@/zvol_id" COPY_FILE_LIST="/etc/hostid @sysconfdir@/zfs/zpool.cache" -COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/default/zfs" +COPY_FILE_LIST="$COPY_FILE_LIST @DEFAULT_INITCONF_DIR@/zfs" COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/zfs-functions" COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/vdev_id.conf" +COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/60-zvol.rules" COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/69-vdev.rules" # These prerequisites are provided by the base system. diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 9249f15eb45..ba73f558a8a 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -31,5 +31,9 @@ $(systemdunit_DATA) $(systemdpreset_DATA):%:%.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +install-data-hook: + $(MKDIR_P) "$(DESTDIR)$(systemdunitdir)" + ln -s /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service" + distclean-local:: -$(RM) $(systemdunit_DATA) $(systemdpreset_DATA) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index b4a79371672..5ef6f7bcfa3 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -433,6 +433,14 @@ systemctl --system daemon-reload >/dev/null || true %{_udevdir}/vdev_id %{_udevdir}/zvol_id %{_udevdir}/rules.d/* +%if ! 0%{?_systemd} || 0%{?_initramfs} +# Files needed for sysvinit and initramfs-tools +%{_sysconfdir}/%{name}/zfs-functions +%config(noreplace) %{_initconfdir}/zfs +%else +%exclude %{_sysconfdir}/%{name}/zfs-functions +%exclude %{_initconfdir}/zfs +%endif %if 0%{?_systemd} %{_unitdir}/* %{_presetdir}/* @@ -440,9 +448,10 @@ systemctl --system daemon-reload >/dev/null || true %{_systemdgeneratordir}/* %else %config(noreplace) %{_sysconfdir}/init.d/* -%config(noreplace) %{_initconfdir}/zfs %endif -%config(noreplace) %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/zed.d/* +%config(noreplace) %{_sysconfdir}/%{name}/zpool.d/* +%config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example %attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/* %files -n libzpool2 From 72dbc01e7f2a0346625bb01b64e96637916e88c7 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Mon, 19 Aug 2019 15:06:53 -0700 Subject: [PATCH 172/325] Add more refquota tests It used to be possible for zfs receive (and other operations related to clone swap) to bypass refquotas. This can cause a number of issues, and there should be an automated test for it. Added tests for rollback and receive not overriding refquota. Reviewed-by: Pavel Zakharov Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #9139 --- tests/runfiles/linux.run | 3 +- .../tests/functional/refquota/Makefile.am | 4 +- .../functional/refquota/refquota_007_neg.ksh | 61 ++++++++++++++++ .../functional/refquota/refquota_008_neg.ksh | 71 +++++++++++++++++++ 4 files changed, 137 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh create mode 100755 tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index d02e15e263d..1b81424c305 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -750,7 +750,8 @@ tags = ['functional', 'redundancy'] [tests/functional/refquota] tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos', - 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg'] + 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg', + 'refquota_007_neg', 'refquota_008_neg'] tags = ['functional', 'refquota'] [tests/functional/refreserv] diff --git a/tests/zfs-tests/tests/functional/refquota/Makefile.am b/tests/zfs-tests/tests/functional/refquota/Makefile.am index 5f7c7b68690..1d8418fbbec 100644 --- a/tests/zfs-tests/tests/functional/refquota/Makefile.am +++ b/tests/zfs-tests/tests/functional/refquota/Makefile.am @@ -7,4 +7,6 @@ dist_pkgdata_SCRIPTS = \ refquota_003_pos.ksh \ refquota_004_pos.ksh \ refquota_005_pos.ksh \ - refquota_006_neg.ksh + refquota_006_neg.ksh \ + refquota_007_neg.ksh \ + refquota_008_neg.ksh diff --git a/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh new file mode 100755 index 00000000000..e2141c7d7f3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. + +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# refquota limits the amount of space a dataset can consume, +# snapshot rollback should be limited by refquota. +# +# STRATEGY: +# 1. Create a file in a filesystem +# 2. Create a snapshot of the filesystem +# 3. Remove the file +# 4. Set a refquota of size half of the file +# 5. Rollback the filesystem from the snapshot +# 6. Rollback should fail +# + +verify_runnable "both" + +function cleanup +{ + log_must $ZFS destroy -rf $TESTPOOL/$TESTFS + log_must $ZFS create $TESTPOOL/$TESTFS + log_must $ZFS set mountpoint=$TESTDIR $TESTPOOL/$TESTFS +} + +log_onexit cleanup + +TESTFILE='testfile' +FS=$TESTPOOL/$TESTFS + +mntpnt=$(get_prop mountpoint $FS) +log_must mkfile 20M $mntpnt/$TESTFILE +log_must zfs snapshot $FS@snap20M +log_must rm $mntpnt/$TESTFILE + +log_must sync + +log_must zfs set refquota=10M $FS +log_mustnot zfs rollback $FS@snap20M + +log_pass "The rollback to the snapshot was restricted by refquota." diff --git a/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh b/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh new file mode 100755 index 00000000000..e7f40ec7176 --- /dev/null +++ b/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. + +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# refquota limits the amount of space a dataset can consume, +# This test verifies that zfs receive does not override +# refquota. +# +# STRATEGY: +# 1. Create a sub-filesystem $TESTSUBFS1 +# 2. Create a file in the sub-filesystem $TESTSUBFS1 +# 3. Create a snapshot of the sub-filesystem $TESTSUBFS1 +# 4. Create another sub-filesystem $TESTSUBFS2 +# 5. Apply a refquota value to $TESTSUBFS2, +# half the sub-filesystem $TESTSUBFS1 file size +# 6. Verify that zfs receive of the snapshot of $TESTSUBFS1 +# fails due to refquota +# + +verify_runnable "both" + +oldvalue=$(get_tunable spa_asize_inflation) +function cleanup +{ + set_tunable32 spa_asize_inflation $oldvalue + log_must zfs destroy -rf $TESTPOOL/$TESTFS + log_must zfs create $TESTPOOL/$TESTFS + log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS +} + +log_onexit cleanup + +set_tunable32 spa_asize_inflation 2 + +TESTFILE='testfile' +FS=$TESTPOOL/$TESTFS +log_must zfs create $FS/$TESTSUBFS1 +log_must zfs create $FS/$TESTSUBFS2 + +mntpnt1=$(get_prop mountpoint $FS/$TESTSUBFS1) +mntpnt2=$(get_prop mountpoint $FS/$TESTSUBFS2) + +log_must mkfile 200M $mntpnt1/$TESTFILE +log_must zfs snapshot $FS/$TESTSUBFS1@snap200m + +log_must zfs set refquota=10M $FS/$TESTSUBFS2 +log_mustnot eval "zfs send $FS/$TESTSUBFS1@snap200m |" \ + "zfs receive -F $FS/$TESTSUBFS2" + +log_pass "ZFS receive does not override refquota" + From ed235deffd2bedb0246b5e272062e3cab2fec86a Mon Sep 17 00:00:00 2001 From: colmbuckley Date: Mon, 19 Aug 2019 23:11:47 +0100 Subject: [PATCH 173/325] Set "none" scheduler if available (initramfs) Existing zfs initramfs script logic will attempt to set the 'noop' scheduler if it's available on the vdev block devices. Newer kernels have the similar 'none' scheduler on multiqueue devices; this change alters the initramfs script logic to also attempt to set this scheduler if it's available. Reviewed-by: Brian Behlendorf Reviewed-by: Garrett Fields Reviewed-by: Richard Laager Signed-off-by: Colm Buckley Closes #9042 --- contrib/initramfs/scripts/zfs.in | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 05410ea2bdc..9e90d76bb11 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -884,20 +884,27 @@ mountroot() ZFS_RPOOL="${pool}" fi - # Set elevator=noop on the root pool's vdevs' disks. ZFS already - # does this for wholedisk vdevs (for all pools), so this is only - # important for partitions. + # Set the no-op scheduler on the disks containing the vdevs of + # the root pool. For single-queue devices, this scheduler is + # "noop", for multi-queue devices, it is "none". + # ZFS already does this for wholedisk vdevs (for all pools), so this + # is only important for partitions. "${ZPOOL}" status -L "${ZFS_RPOOL}" 2> /dev/null | awk '/^\t / && !/(mirror|raidz)/ { dev=$1; sub(/[0-9]+$/, "", dev); print dev }' | - while read i + while read -r i do - if grep -sq noop /sys/block/$i/queue/scheduler + SCHEDULER=/sys/block/$i/queue/scheduler + if [ -e "${SCHEDULER}" ] then - echo noop > "/sys/block/$i/queue/scheduler" + # Query to see what schedulers are available + case "$(cat "${SCHEDULER}")" in + *noop*) echo noop > "${SCHEDULER}" ;; + *none*) echo none > "${SCHEDULER}" ;; + esac fi done From db58aa717b022e0096cd0c8ffe1ee3a008821fee Mon Sep 17 00:00:00 2001 From: jdike <52420226+jdike@users.noreply.github.com> Date: Mon, 19 Aug 2019 19:04:26 -0400 Subject: [PATCH 174/325] Fix lockdep circular locking false positive involving sa_lock There are two different deadlock scenarios, but they share a common link, which is thread 1 holding sa_lock and trying to get zap->zap_rwlock: zap_lockdir_impl+0x858/0x16c0 [zfs] zap_lockdir+0xd2/0x100 [zfs] zap_lookup_norm+0x7f/0x100 [zfs] zap_lookup+0x12/0x20 [zfs] sa_setup+0x902/0x1380 [zfs] zfsvfs_init+0x3d6/0xb20 [zfs] zfsvfs_create+0x5dd/0x900 [zfs] zfs_domount+0xa3/0xe20 [zfs] and thread 2 trying to get sa_lock, either in sa_setup: sa_setup+0x742/0x1380 [zfs] zfsvfs_init+0x3d6/0xb20 [zfs] zfsvfs_create+0x5dd/0x900 [zfs] zfs_domount+0xa3/0xe20 [zfs] or in sa_build_index: sa_build_index+0x13d/0x790 [zfs] sa_handle_get_from_db+0x368/0x500 [zfs] zfs_znode_sa_init.isra.0+0x24b/0x330 [zfs] zfs_znode_alloc+0x3da/0x1a40 [zfs] zfs_zget+0x39a/0x6e0 [zfs] zfs_root+0x101/0x160 [zfs] zfs_domount+0x91f/0xea0 [zfs] From there, there are different locking paths back to something holding zap->zap_rwlock. The deadlock scenarios involve multiple different ZFS filesystems being mounted. sa_lock is common to these scenarios, and the sa struct involved is private to a mount. Therefore, these must be referring to different sa_lock instances and these deadlocks can't occur in practice. The fix, from Brian Behlendorf, is to remove sa_lock from lockdep coverage by initializing it with MUTEX_NOLOCKDEP. Reviewed-by: Brian Behlendorf Signed-off-by: Jeff Dike Closes #9110 --- module/zfs/sa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 4999fef345d..f718e7662e6 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1014,7 +1014,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, } sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); - mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&sa->sa_lock, NULL, MUTEX_NOLOCKDEP, NULL); sa->sa_master_obj = sa_obj; os->os_sa = sa; From dcaa460d6d5afd66f9e9d24c113bfe984c2b34c9 Mon Sep 17 00:00:00 2001 From: Alexey Smirnoff Date: Tue, 20 Aug 2019 23:26:19 +0000 Subject: [PATCH 175/325] zfs-functions.in: in_mtab() always returns 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $fs used with the wrong sed command where should be $mntpnt instead to match a variable exported by read_mtab() The fix is mostly to reuse the sed command found in read_mtab() Reviewed-by: Brian Behlendorf Reviewed-by: Michael Niewöhner Signed-off-by: Alexey Smirnoff Closes #9168 --- etc/init.d/zfs-functions.in | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/etc/init.d/zfs-functions.in b/etc/init.d/zfs-functions.in index cbc7fd22a0a..d65c79dcfd3 100644 --- a/etc/init.d/zfs-functions.in +++ b/etc/init.d/zfs-functions.in @@ -366,10 +366,13 @@ read_mtab() in_mtab() { - local fs="$(echo "$1" | sed 's,/,_,g')" + local mntpnt="$1" + # Remove 'unwanted' characters. + mntpnt=$(printf '%b\n' "$mntpnt" | sed -e 's,/,,g' \ + -e 's,-,,g' -e 's,\.,,g' -e 's, ,,g') local var - var="$(eval echo MTAB_$fs)" + var="$(eval echo MTAB_$mntpnt)" [ "$(eval echo "$""$var")" != "" ] return "$?" } From b7c9207fbd0d39f321b1681ac84aa98ef8dfc22b Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Wed, 21 Aug 2019 12:01:59 -0400 Subject: [PATCH 176/325] Minor cleanup in Makefile.am Split long lines where adding license info to dist archive. Remove extra colon from target line. Reviewed-by: Chris Dunlop Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9189 --- Makefile.am | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9afe2295410..da4f6407d18 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,11 +25,16 @@ EXTRA_DIST += META AUTHORS COPYRIGHT LICENSE NEWS NOTICE README.md EXTRA_DIST += CODE_OF_CONDUCT.md # Include all the extra licensing information for modules -EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE module/icp/algs/skein/THIRDPARTYLICENSE.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip -EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2 module/spl/THIRDPARTYLICENSE.gplv2.descrip -EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash module/zfs/THIRDPARTYLICENSE.cityhash.descrip +EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE +EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE.descrip +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip +EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2 +EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2.descrip +EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash +EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash.descrip @CODE_COVERAGE_RULES@ @@ -39,7 +44,7 @@ gitrev: BUILT_SOURCES = gitrev -distclean-local:: +distclean-local: -$(RM) -R autom4te*.cache -find . \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS \ -o -name .pc -o -name .hg -o -name .git \) -prune -o \ From b72548575e12b4d88ec917a6097d971673fec536 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 22 Aug 2019 08:53:44 -0700 Subject: [PATCH 177/325] ZTS: Fix vdev_zaps_005_pos on CentOS 6 The ancient version of blkid (v2.17.2) used in CentOS 6 will not detect the newly created pool unless it has been written to. Force a pool sync so `zpool import` will detect the newly created pool. Reviewed-by: John Kennedy Signed-off-by: Brian Behlendorf Closes #9199 --- tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh b/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh index 8cf8e6d4055..066be917e43 100755 --- a/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh @@ -41,6 +41,7 @@ orig_top=$(get_top_vd_zap $DISK $conf) orig_leaf=$(get_leaf_vd_zap $DISK $conf) assert_zap_common $TESTPOOL $DISK "top" $orig_top assert_zap_common $TESTPOOL $DISK "leaf" $orig_leaf +log_must zpool sync # Export the pool. log_must zpool export $TESTPOOL From f64ef7317a134232aefbf8ed66bb246265373cd8 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 22 Aug 2019 12:44:11 -0400 Subject: [PATCH 178/325] Enhance ioctl number checks When checking ZFS_IOC_* numbers, print which numbers are wrong rather than silently failing. Reviewed-by: Chris Dunlop Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9187 --- .../libzfs_input_check/libzfs_input_check.c | 182 ++++++++++-------- 1 file changed, 97 insertions(+), 85 deletions(-) diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index 977b9e2f3dd..1b949c7e232 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -827,91 +827,103 @@ enum zfs_ioc_ref { boolean_t validate_ioc_values(void) { - return ( - ZFS_IOC_BASE + 0 == ZFS_IOC_POOL_CREATE && - ZFS_IOC_BASE + 1 == ZFS_IOC_POOL_DESTROY && - ZFS_IOC_BASE + 2 == ZFS_IOC_POOL_IMPORT && - ZFS_IOC_BASE + 3 == ZFS_IOC_POOL_EXPORT && - ZFS_IOC_BASE + 4 == ZFS_IOC_POOL_CONFIGS && - ZFS_IOC_BASE + 5 == ZFS_IOC_POOL_STATS && - ZFS_IOC_BASE + 6 == ZFS_IOC_POOL_TRYIMPORT && - ZFS_IOC_BASE + 7 == ZFS_IOC_POOL_SCAN && - ZFS_IOC_BASE + 8 == ZFS_IOC_POOL_FREEZE && - ZFS_IOC_BASE + 9 == ZFS_IOC_POOL_UPGRADE && - ZFS_IOC_BASE + 10 == ZFS_IOC_POOL_GET_HISTORY && - ZFS_IOC_BASE + 11 == ZFS_IOC_VDEV_ADD && - ZFS_IOC_BASE + 12 == ZFS_IOC_VDEV_REMOVE && - ZFS_IOC_BASE + 13 == ZFS_IOC_VDEV_SET_STATE && - ZFS_IOC_BASE + 14 == ZFS_IOC_VDEV_ATTACH && - ZFS_IOC_BASE + 15 == ZFS_IOC_VDEV_DETACH && - ZFS_IOC_BASE + 16 == ZFS_IOC_VDEV_SETPATH && - ZFS_IOC_BASE + 17 == ZFS_IOC_VDEV_SETFRU && - ZFS_IOC_BASE + 18 == ZFS_IOC_OBJSET_STATS && - ZFS_IOC_BASE + 19 == ZFS_IOC_OBJSET_ZPLPROPS && - ZFS_IOC_BASE + 20 == ZFS_IOC_DATASET_LIST_NEXT && - ZFS_IOC_BASE + 21 == ZFS_IOC_SNAPSHOT_LIST_NEXT && - ZFS_IOC_BASE + 22 == ZFS_IOC_SET_PROP && - ZFS_IOC_BASE + 23 == ZFS_IOC_CREATE && - ZFS_IOC_BASE + 24 == ZFS_IOC_DESTROY && - ZFS_IOC_BASE + 25 == ZFS_IOC_ROLLBACK && - ZFS_IOC_BASE + 26 == ZFS_IOC_RENAME && - ZFS_IOC_BASE + 27 == ZFS_IOC_RECV && - ZFS_IOC_BASE + 28 == ZFS_IOC_SEND && - ZFS_IOC_BASE + 29 == ZFS_IOC_INJECT_FAULT && - ZFS_IOC_BASE + 30 == ZFS_IOC_CLEAR_FAULT && - ZFS_IOC_BASE + 31 == ZFS_IOC_INJECT_LIST_NEXT && - ZFS_IOC_BASE + 32 == ZFS_IOC_ERROR_LOG && - ZFS_IOC_BASE + 33 == ZFS_IOC_CLEAR && - ZFS_IOC_BASE + 34 == ZFS_IOC_PROMOTE && - ZFS_IOC_BASE + 35 == ZFS_IOC_SNAPSHOT && - ZFS_IOC_BASE + 36 == ZFS_IOC_DSOBJ_TO_DSNAME && - ZFS_IOC_BASE + 37 == ZFS_IOC_OBJ_TO_PATH && - ZFS_IOC_BASE + 38 == ZFS_IOC_POOL_SET_PROPS && - ZFS_IOC_BASE + 39 == ZFS_IOC_POOL_GET_PROPS && - ZFS_IOC_BASE + 40 == ZFS_IOC_SET_FSACL && - ZFS_IOC_BASE + 41 == ZFS_IOC_GET_FSACL && - ZFS_IOC_BASE + 42 == ZFS_IOC_SHARE && - ZFS_IOC_BASE + 43 == ZFS_IOC_INHERIT_PROP && - ZFS_IOC_BASE + 44 == ZFS_IOC_SMB_ACL && - ZFS_IOC_BASE + 45 == ZFS_IOC_USERSPACE_ONE && - ZFS_IOC_BASE + 46 == ZFS_IOC_USERSPACE_MANY && - ZFS_IOC_BASE + 47 == ZFS_IOC_USERSPACE_UPGRADE && - ZFS_IOC_BASE + 48 == ZFS_IOC_HOLD && - ZFS_IOC_BASE + 49 == ZFS_IOC_RELEASE && - ZFS_IOC_BASE + 50 == ZFS_IOC_GET_HOLDS && - ZFS_IOC_BASE + 51 == ZFS_IOC_OBJSET_RECVD_PROPS && - ZFS_IOC_BASE + 52 == ZFS_IOC_VDEV_SPLIT && - ZFS_IOC_BASE + 53 == ZFS_IOC_NEXT_OBJ && - ZFS_IOC_BASE + 54 == ZFS_IOC_DIFF && - ZFS_IOC_BASE + 55 == ZFS_IOC_TMP_SNAPSHOT && - ZFS_IOC_BASE + 56 == ZFS_IOC_OBJ_TO_STATS && - ZFS_IOC_BASE + 57 == ZFS_IOC_SPACE_WRITTEN && - ZFS_IOC_BASE + 58 == ZFS_IOC_SPACE_SNAPS && - ZFS_IOC_BASE + 59 == ZFS_IOC_DESTROY_SNAPS && - ZFS_IOC_BASE + 60 == ZFS_IOC_POOL_REGUID && - ZFS_IOC_BASE + 61 == ZFS_IOC_POOL_REOPEN && - ZFS_IOC_BASE + 62 == ZFS_IOC_SEND_PROGRESS && - ZFS_IOC_BASE + 63 == ZFS_IOC_LOG_HISTORY && - ZFS_IOC_BASE + 64 == ZFS_IOC_SEND_NEW && - ZFS_IOC_BASE + 65 == ZFS_IOC_SEND_SPACE && - ZFS_IOC_BASE + 66 == ZFS_IOC_CLONE && - ZFS_IOC_BASE + 67 == ZFS_IOC_BOOKMARK && - ZFS_IOC_BASE + 68 == ZFS_IOC_GET_BOOKMARKS && - ZFS_IOC_BASE + 69 == ZFS_IOC_DESTROY_BOOKMARKS && - ZFS_IOC_BASE + 70 == ZFS_IOC_RECV_NEW && - ZFS_IOC_BASE + 71 == ZFS_IOC_POOL_SYNC && - ZFS_IOC_BASE + 72 == ZFS_IOC_CHANNEL_PROGRAM && - ZFS_IOC_BASE + 73 == ZFS_IOC_LOAD_KEY && - ZFS_IOC_BASE + 74 == ZFS_IOC_UNLOAD_KEY && - ZFS_IOC_BASE + 75 == ZFS_IOC_CHANGE_KEY && - ZFS_IOC_BASE + 76 == ZFS_IOC_REMAP && - ZFS_IOC_BASE + 77 == ZFS_IOC_POOL_CHECKPOINT && - ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT && - ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE && - ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM && - LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT && - LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR && - LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK); + boolean_t result = TRUE; + +#define CHECK(expr) do { \ + if (!(expr)) { \ + result = FALSE; \ + fprintf(stderr, "(%s) === FALSE\n", #expr); \ + } \ +} while (0) + + CHECK(ZFS_IOC_BASE + 0 == ZFS_IOC_POOL_CREATE); + CHECK(ZFS_IOC_BASE + 1 == ZFS_IOC_POOL_DESTROY); + CHECK(ZFS_IOC_BASE + 2 == ZFS_IOC_POOL_IMPORT); + CHECK(ZFS_IOC_BASE + 3 == ZFS_IOC_POOL_EXPORT); + CHECK(ZFS_IOC_BASE + 4 == ZFS_IOC_POOL_CONFIGS); + CHECK(ZFS_IOC_BASE + 5 == ZFS_IOC_POOL_STATS); + CHECK(ZFS_IOC_BASE + 6 == ZFS_IOC_POOL_TRYIMPORT); + CHECK(ZFS_IOC_BASE + 7 == ZFS_IOC_POOL_SCAN); + CHECK(ZFS_IOC_BASE + 8 == ZFS_IOC_POOL_FREEZE); + CHECK(ZFS_IOC_BASE + 9 == ZFS_IOC_POOL_UPGRADE); + CHECK(ZFS_IOC_BASE + 10 == ZFS_IOC_POOL_GET_HISTORY); + CHECK(ZFS_IOC_BASE + 11 == ZFS_IOC_VDEV_ADD); + CHECK(ZFS_IOC_BASE + 12 == ZFS_IOC_VDEV_REMOVE); + CHECK(ZFS_IOC_BASE + 13 == ZFS_IOC_VDEV_SET_STATE); + CHECK(ZFS_IOC_BASE + 14 == ZFS_IOC_VDEV_ATTACH); + CHECK(ZFS_IOC_BASE + 15 == ZFS_IOC_VDEV_DETACH); + CHECK(ZFS_IOC_BASE + 16 == ZFS_IOC_VDEV_SETPATH); + CHECK(ZFS_IOC_BASE + 17 == ZFS_IOC_VDEV_SETFRU); + CHECK(ZFS_IOC_BASE + 18 == ZFS_IOC_OBJSET_STATS); + CHECK(ZFS_IOC_BASE + 19 == ZFS_IOC_OBJSET_ZPLPROPS); + CHECK(ZFS_IOC_BASE + 20 == ZFS_IOC_DATASET_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 21 == ZFS_IOC_SNAPSHOT_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 22 == ZFS_IOC_SET_PROP); + CHECK(ZFS_IOC_BASE + 23 == ZFS_IOC_CREATE); + CHECK(ZFS_IOC_BASE + 24 == ZFS_IOC_DESTROY); + CHECK(ZFS_IOC_BASE + 25 == ZFS_IOC_ROLLBACK); + CHECK(ZFS_IOC_BASE + 26 == ZFS_IOC_RENAME); + CHECK(ZFS_IOC_BASE + 27 == ZFS_IOC_RECV); + CHECK(ZFS_IOC_BASE + 28 == ZFS_IOC_SEND); + CHECK(ZFS_IOC_BASE + 29 == ZFS_IOC_INJECT_FAULT); + CHECK(ZFS_IOC_BASE + 30 == ZFS_IOC_CLEAR_FAULT); + CHECK(ZFS_IOC_BASE + 31 == ZFS_IOC_INJECT_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 32 == ZFS_IOC_ERROR_LOG); + CHECK(ZFS_IOC_BASE + 33 == ZFS_IOC_CLEAR); + CHECK(ZFS_IOC_BASE + 34 == ZFS_IOC_PROMOTE); + CHECK(ZFS_IOC_BASE + 35 == ZFS_IOC_SNAPSHOT); + CHECK(ZFS_IOC_BASE + 36 == ZFS_IOC_DSOBJ_TO_DSNAME); + CHECK(ZFS_IOC_BASE + 37 == ZFS_IOC_OBJ_TO_PATH); + CHECK(ZFS_IOC_BASE + 38 == ZFS_IOC_POOL_SET_PROPS); + CHECK(ZFS_IOC_BASE + 39 == ZFS_IOC_POOL_GET_PROPS); + CHECK(ZFS_IOC_BASE + 40 == ZFS_IOC_SET_FSACL); + CHECK(ZFS_IOC_BASE + 41 == ZFS_IOC_GET_FSACL); + CHECK(ZFS_IOC_BASE + 42 == ZFS_IOC_SHARE); + CHECK(ZFS_IOC_BASE + 43 == ZFS_IOC_INHERIT_PROP); + CHECK(ZFS_IOC_BASE + 44 == ZFS_IOC_SMB_ACL); + CHECK(ZFS_IOC_BASE + 45 == ZFS_IOC_USERSPACE_ONE); + CHECK(ZFS_IOC_BASE + 46 == ZFS_IOC_USERSPACE_MANY); + CHECK(ZFS_IOC_BASE + 47 == ZFS_IOC_USERSPACE_UPGRADE); + CHECK(ZFS_IOC_BASE + 48 == ZFS_IOC_HOLD); + CHECK(ZFS_IOC_BASE + 49 == ZFS_IOC_RELEASE); + CHECK(ZFS_IOC_BASE + 50 == ZFS_IOC_GET_HOLDS); + CHECK(ZFS_IOC_BASE + 51 == ZFS_IOC_OBJSET_RECVD_PROPS); + CHECK(ZFS_IOC_BASE + 52 == ZFS_IOC_VDEV_SPLIT); + CHECK(ZFS_IOC_BASE + 53 == ZFS_IOC_NEXT_OBJ); + CHECK(ZFS_IOC_BASE + 54 == ZFS_IOC_DIFF); + CHECK(ZFS_IOC_BASE + 55 == ZFS_IOC_TMP_SNAPSHOT); + CHECK(ZFS_IOC_BASE + 56 == ZFS_IOC_OBJ_TO_STATS); + CHECK(ZFS_IOC_BASE + 57 == ZFS_IOC_SPACE_WRITTEN); + CHECK(ZFS_IOC_BASE + 58 == ZFS_IOC_SPACE_SNAPS); + CHECK(ZFS_IOC_BASE + 59 == ZFS_IOC_DESTROY_SNAPS); + CHECK(ZFS_IOC_BASE + 60 == ZFS_IOC_POOL_REGUID); + CHECK(ZFS_IOC_BASE + 61 == ZFS_IOC_POOL_REOPEN); + CHECK(ZFS_IOC_BASE + 62 == ZFS_IOC_SEND_PROGRESS); + CHECK(ZFS_IOC_BASE + 63 == ZFS_IOC_LOG_HISTORY); + CHECK(ZFS_IOC_BASE + 64 == ZFS_IOC_SEND_NEW); + CHECK(ZFS_IOC_BASE + 65 == ZFS_IOC_SEND_SPACE); + CHECK(ZFS_IOC_BASE + 66 == ZFS_IOC_CLONE); + CHECK(ZFS_IOC_BASE + 67 == ZFS_IOC_BOOKMARK); + CHECK(ZFS_IOC_BASE + 68 == ZFS_IOC_GET_BOOKMARKS); + CHECK(ZFS_IOC_BASE + 69 == ZFS_IOC_DESTROY_BOOKMARKS); + CHECK(ZFS_IOC_BASE + 70 == ZFS_IOC_RECV_NEW); + CHECK(ZFS_IOC_BASE + 71 == ZFS_IOC_POOL_SYNC); + CHECK(ZFS_IOC_BASE + 72 == ZFS_IOC_CHANNEL_PROGRAM); + CHECK(ZFS_IOC_BASE + 73 == ZFS_IOC_LOAD_KEY); + CHECK(ZFS_IOC_BASE + 74 == ZFS_IOC_UNLOAD_KEY); + CHECK(ZFS_IOC_BASE + 75 == ZFS_IOC_CHANGE_KEY); + CHECK(ZFS_IOC_BASE + 76 == ZFS_IOC_REMAP); + CHECK(ZFS_IOC_BASE + 77 == ZFS_IOC_POOL_CHECKPOINT); + CHECK(ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT); + CHECK(ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE); + CHECK(ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM); + CHECK(LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT); + CHECK(LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); + CHECK(LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK); + +#undef CHECK + + return (result); } int From 0189eb47625991621a182d9881cca6aacb6f5942 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 22 Aug 2019 12:46:09 -0400 Subject: [PATCH 179/325] Dedup IOC enum values in libzfs_input_check Reuse enum value ZFS_IOC_BASE for `('Z' << 8)`. This is helpful on FreeBSD where ZFS_IOC_BASE has a different value and `('Z' << 8)` is wrong. Reviewed-by: Chris Dunlop Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9188 --- tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index 1b949c7e232..a59bededb54 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -816,8 +816,8 @@ zfs_ioc_input_tests(const char *pool) enum zfs_ioc_ref { ZFS_IOC_BASE = ('Z' << 8), - LINUX_IOC_BASE = ('Z' << 8) + 0x80, - FREEBSD_IOC_BASE = ('Z' << 8) + 0xC0, + LINUX_IOC_BASE = ZFS_IOC_BASE + 0x80, + FREEBSD_IOC_BASE = ZFS_IOC_BASE + 0xC0, }; /* From 7ed41d292b560b51d1cedec26070161b71eb6b22 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 22 Aug 2019 13:48:48 -0300 Subject: [PATCH 180/325] Document ZFS_DKMS_ENABLE_DEBUGINFO in userland configuration Document the ZFS_DKMS_ENABLE_DEBUGINFO option in the userland configuration file, as done with the other ZFS_DKMS_* options. It has been introduced with commit e45c1734a665 ("dkms: Enable debuginfo option to be set with zfs sysconfig file") but isn't mentioned anywhere other than the 'dkms.conf' file (generated). Reviewed-by: Brian Behlendorf Signed-off-by: Mauricio Faria de Oliveira Closes #9191 --- etc/init.d/zfs.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/etc/init.d/zfs.in b/etc/init.d/zfs.in index 7998569b2c2..d4ad1beaaa2 100644 --- a/etc/init.d/zfs.in +++ b/etc/init.d/zfs.in @@ -91,6 +91,10 @@ MOUNT_EXTRA_OPTIONS="" # Only applicable for Debian GNU/Linux {dkms,initramfs}. ZFS_DKMS_ENABLE_DEBUG='no' +# Build kernel modules with the --enable-debuginfo switch? +# Only applicable for Debian GNU/Linux {dkms,initramfs}. +ZFS_DKMS_ENABLE_DEBUGINFO='no' + # Keep debugging symbols in kernel modules? # Only applicable for Debian GNU/Linux {dkms,initramfs}. ZFS_DKMS_DISABLE_STRIP='no' From 3b47c941eb15accaed3ff0dd625fe5e28486fe12 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Thu, 22 Aug 2019 12:01:41 -0700 Subject: [PATCH 181/325] Fix install error introduced by #9089 Signed-off-by: Paul Dagnelie --- etc/systemd/system/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index ba73f558a8a..130c6c757a5 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -33,7 +33,7 @@ $(systemdunit_DATA) $(systemdpreset_DATA):%:%.in install-data-hook: $(MKDIR_P) "$(DESTDIR)$(systemdunitdir)" - ln -s /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service" + ln -sf /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service" distclean-local:: -$(RM) $(systemdunit_DATA) $(systemdpreset_DATA) From 9ad6f69a03cc0ab0f337415fa62dc98af2ed7db7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 22 Aug 2019 17:37:48 -0700 Subject: [PATCH 182/325] ZTS: Fix in-tree dbufstats test case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a887d653 updated the dbufstats such that escalated privileges are required. Since all tests under cli_user are run with normal privileges move this test case to a location where it will be run required privileges. Reviewed-by: John Kennedy Reviewed-by: Ryan Moeller Reviewed-by: Michael Niewöhner Signed-off-by: Brian Behlendorf Closes #9118 Closes #9196 --- tests/runfiles/linux.run | 4 ++-- tests/zfs-tests/tests/functional/arc/Makefile.am | 3 ++- .../misc/dbufstat_001_pos.ksh => arc/dbufstats_003_pos.ksh} | 4 ++-- tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am | 3 +-- 4 files changed, 7 insertions(+), 7 deletions(-) rename tests/zfs-tests/tests/functional/{cli_user/misc/dbufstat_001_pos.ksh => arc/dbufstats_003_pos.ksh} (90%) diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 1b81424c305..cadb4da9a7a 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -33,7 +33,7 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', tags = ['functional', 'alloc_class'] [tests/functional/arc] -tests = ['dbufstats_001_pos', 'dbufstats_002_pos'] +tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos'] tags = ['functional', 'arc'] [tests/functional/atime] @@ -503,7 +503,7 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', - 'arc_summary_001_pos', 'arc_summary_002_neg', 'dbufstat_001_pos'] + 'arc_summary_001_pos', 'arc_summary_002_neg'] user = tags = ['functional', 'cli_user', 'misc'] diff --git a/tests/zfs-tests/tests/functional/arc/Makefile.am b/tests/zfs-tests/tests/functional/arc/Makefile.am index dc57ebc8627..22704fa5181 100644 --- a/tests/zfs-tests/tests/functional/arc/Makefile.am +++ b/tests/zfs-tests/tests/functional/arc/Makefile.am @@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ dbufstats_001_pos.ksh \ - dbufstats_002_pos.ksh + dbufstats_002_pos.ksh \ + dbufstats_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh b/tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh similarity index 90% rename from tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh rename to tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh index 0e187015f8d..91cec74881a 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh @@ -33,11 +33,11 @@ log_assert "dbufstat generates output and doesn't return an error code" typeset -i i=0 while [[ $i -lt ${#args[*]} ]]; do - log_must eval "sudo dbufstat ${args[i]} > /dev/null" + log_must eval "dbufstat ${args[i]} >/dev/null" ((i = i + 1)) done # A simple test of dbufstat filter functionality -log_must eval "sudo dbufstat -F object=10,dbc=1,pool=$TESTPOOL > /dev/null" +log_must eval "dbufstat -F object=10,dbc=1,pool=$TESTPOOL >/dev/null" log_pass "dbufstat generates output and doesn't return an error code" diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am index 29c03429091..49138d927e0 100644 --- a/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am @@ -45,8 +45,7 @@ dist_pkgdata_SCRIPTS = \ zpool_upgrade_001_neg.ksh \ arcstat_001_pos.ksh \ arc_summary_001_pos.ksh \ - arc_summary_002_neg.ksh \ - dbufstat_001_pos.ksh + arc_summary_002_neg.ksh dist_pkgdata_DATA = \ misc.cfg From be6ae014352b36c491ce04da394528392428fa1a Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Sun, 25 Aug 2019 21:30:39 -0400 Subject: [PATCH 183/325] Split argument list, satisfy shellcheck SC2086 Split the arguments for ${TEST_RUNNER} across multiple lines for clarity. Also added quotes in the message to match the invoked command. Unquoted variables in argument lists are subject to splitting. In this particular case we can't quote the variable because it is an optional argument. Use the method suggested in the description linked below, instead. The technique is to use an unquoted variable with an alternate value. https://github.com/koalaman/shellcheck/wiki/SC2086 Reviewed-by: Brian Behlendorf Reviewed-by: Giuseppe Di Natale Reviewed-by: John Kennedy Signed-off-by: Ryan Moeller Closes #9212 --- scripts/zfs-tests.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh index 7c5286ba70f..ce766e23982 100755 --- a/scripts/zfs-tests.sh +++ b/scripts/zfs-tests.sh @@ -31,7 +31,7 @@ fi PROG=zfs-tests.sh VERBOSE="no" -QUIET= +QUIET="" CLEANUP="yes" CLEANUPALL="no" LOOPBACK="yes" @@ -307,7 +307,7 @@ while getopts 'hvqxkfScn:d:s:r:?t:T:u:I:' OPTION; do VERBOSE="yes" ;; q) - QUIET="-q" + QUIET="yes" ;; x) CLEANUPALL="yes" @@ -602,10 +602,17 @@ REPORT_FILE=$(mktemp -u -t zts-report.XXXX -p "$FILEDIR") # # Run all the tests as specified. # -msg "${TEST_RUNNER} ${QUIET} -c ${RUNFILE} -T ${TAGS} -i ${STF_SUITE}" \ - "-I ${ITERATIONS}" -${TEST_RUNNER} ${QUIET} -c "${RUNFILE}" -T "${TAGS}" -i "${STF_SUITE}" \ - -I "${ITERATIONS}" 2>&1 | tee "$RESULTS_FILE" +msg "${TEST_RUNNER} ${QUIET:+-q}" \ + "-c \"${RUNFILE}\"" \ + "-T \"${TAGS}\"" \ + "-i \"${STF_SUITE}\"" \ + "-I \"${ITERATIONS}\"" +${TEST_RUNNER} ${QUIET:+-q} \ + -c "${RUNFILE}" \ + -T "${TAGS}" \ + -i "${STF_SUITE}" \ + -I "${ITERATIONS}" \ + 2>&1 | tee "$RESULTS_FILE" # # Analyze the results. From 0c6ccc99b2484d3499a0c70a3194115e84806df3 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Sun, 25 Aug 2019 18:33:03 -0700 Subject: [PATCH 184/325] Add regression test for "zpool list -p" Other than this test, zpool list -p is not well tested by any of the automated tests. Add a test for zpool list -p. Reviewed-by: Prakash Surya Reviewed-by: Serapheim Dimitropoulos Reviewed-by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #9134 --- tests/runfiles/linux.run | 2 +- .../functional/cli_root/zpool_get/Makefile.am | 5 +- .../cli_root/zpool_get/zpool_get_005_pos.ksh | 78 +++++++++++++++++++ .../cli_root/zpool_get/zpool_get_parsable.cfg | 33 ++++++++ 4 files changed, 115 insertions(+), 3 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh create mode 100644 tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index cadb4da9a7a..a5fe26dfdda 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -361,7 +361,7 @@ tags = ['functional', 'cli_root', 'zpool_export'] [tests/functional/cli_root/zpool_get] tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos', - 'zpool_get_004_neg'] + 'zpool_get_004_neg', 'zpool_get_005_pos'] tags = ['functional', 'cli_root', 'zpool_get'] [tests/functional/cli_root/zpool_history] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am index 36a7f23126a..0c87c9b3776 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am @@ -5,7 +5,8 @@ dist_pkgdata_SCRIPTS = \ zpool_get_001_pos.ksh \ zpool_get_002_pos.ksh \ zpool_get_003_pos.ksh \ - zpool_get_004_neg.ksh + zpool_get_004_neg.ksh \ + zpool_get_005_pos.ksh dist_pkgdata_DATA = \ - zpool_get.cfg + zpool_get.cfg zpool_get_parsable.cfg diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh new file mode 100755 index 00000000000..ad27d180fdb --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2014 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg + +# +# DESCRIPTION: +# +# Zpool get returns parsable values for all known parsable properties +# +# STRATEGY: +# 1. For all parsable properties, verify zpool get -p returns a parsable value +# + +if ! is_global_zone ; then + TESTPOOL=${TESTPOOL%%/*} +fi + +typeset -i i=0 + +while [[ $i -lt "${#properties[@]}" ]]; do + log_note "Checking for parsable ${properties[$i]} property" + log_must eval "zpool get -p ${properties[$i]} $TESTPOOL >/tmp/value.$$" + grep "${properties[$i]}" /tmp/value.$$ >/dev/null 2>&1 + if [[ $? -ne 0 ]]; then + log_fail "${properties[$i]} not seen in output" + fi + + typeset v=$(grep "${properties[$i]}" /tmp/value.$$ | awk '{print $3}') + + log_note "${properties[$i]} has a value of $v" + + # Determine if this value is a valid number, result in return code + log_must test -n "$v" + expr $v + 0 >/dev/null 2>&1 + + # All properties must be positive integers in order to be + # parsable (i.e. a return code of 0 or 1 from expr above). + # The only exception is "expandsize", which may be "-". + if [[ ! ($? -eq 0 || $? -eq 1 || \ + ("${properties[$i]}" = "expandsize" && "$v" = "-")) ]]; then + log_fail "${properties[$i]} is not parsable" + fi + + i=$(( $i + 1 )) +done + +rm /tmp/value.$$ +log_pass "Zpool get returns parsable values for all known parsable properties" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg new file mode 100644 index 00000000000..e7b95a47223 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2014 by Delphix. All rights reserved. +# + +# Set the expected properties of zpool +typeset -a properties=("allocated" "capacity" "expandsize" "free" "freeing" + "leaked" "size") From f785ce65c12e81f27c7c69db3e8a85a71c9b2709 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Wed, 28 Aug 2019 13:38:40 -0400 Subject: [PATCH 185/325] Prefer `for (;;)` to `while (TRUE)` Defining a special constant to make an infinite loop is excessive, especially when the name clashes with symbols commonly defined on some platforms (ie FreeBSD). Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: John Kennedy Closes #9219 --- .../cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c b/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c index 7986851efae..e262ecefea9 100644 --- a/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c +++ b/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c @@ -47,7 +47,6 @@ #include #include -static const int TRUE = 1; static char *filebase; static int @@ -65,7 +64,7 @@ mover(void *a) len = strlen(filebase) + 5; - while (TRUE) { + for (;;) { idx = pickidx(); (void) snprintf(buf, len, "%s.%03d", filebase, idx); ret = rename(filebase, buf); @@ -85,7 +84,7 @@ cleaner(void *a) len = strlen(filebase) + 5; - while (TRUE) { + for (;;) { idx = pickidx(); (void) snprintf(buf, len, "%s.%03d", filebase, idx); ret = remove(buf); @@ -102,7 +101,7 @@ writer(void *a) int *fd = (int *)a; int ret; - while (TRUE) { + for (;;) { if (*fd != -1) (void) close (*fd); @@ -143,7 +142,7 @@ main(int argc, char **argv) (void) pthread_create(&tid, NULL, cleaner, NULL); (void) pthread_create(&tid, NULL, writer, (void *) &fd); - while (TRUE) { + for (;;) { int ret; struct stat st; From 16f42e1b6d4c4b43380fe22edfd265d5c0f5edcb Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Wed, 28 Aug 2019 15:56:54 -0600 Subject: [PATCH 186/325] Use smaller default slack/delta value for schedule_hrtimeout_range() For interrupt coalescing, cv_timedwait_hires() uses a 100us slack/delta for calls to schedule_hrtimeout_range(). This 100us slack can be costly for small writes. This change improves small write performance by passing resolution `res` parameter to schedule_hrtimeout_range() to be used as delta/slack. A new tunable `spl_schedule_hrtimeout_slack_us` is added to preserve old behavior when desired. Performance observations on 8K recordsize filesystem: - 8K random writes at 1-64 threads, up to 60% improvement for one thread and smaller gains as thread count increases. At >64 threads, 2-5% decrease in performance was observed. - 8K sequential writes, similar 60% improvement for one thread and leveling out around 64 threads. At >64 threads, 5-10% decrease in performance was observed. - 128K sequential write sees 1-5 for the 128K. No observed regression at high thread count. Testing done on Ubuntu 18.04 with 4.15 kernel, 8vCPUs and SSD storage on VMware ESX. Reviewed-by: Richard Elling Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Tony Nguyen Closes #9217 --- module/spl/spl-condvar.c | 54 +++++++++++++++++++++++++++------------- module/zfs/mmp.c | 2 +- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 19c575f770b..664fae1e719 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -26,8 +26,10 @@ #include #include +#include #include #include +#include #include @@ -35,6 +37,34 @@ #include #endif +#define MAX_HRTIMEOUT_SLACK_US 1000 +unsigned int spl_schedule_hrtimeout_slack_us = 0; + +static int +param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) +{ + unsigned long val; + int error; + + error = kstrtoul(buf, 0, &val); + if (error) + return (error); + + if (val > MAX_HRTIMEOUT_SLACK_US) + return (-EINVAL); + + error = param_set_uint(buf, kp); + if (error < 0) + return (error); + + return (0); +} + +module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, + param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); +MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, + "schedule_hrtimeout_range() delta/slack value in us, default(0)"); + void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) { @@ -304,12 +334,13 @@ EXPORT_SYMBOL(__cv_timedwait_sig); */ static clock_t __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, - int state) + hrtime_t res, int state) { DEFINE_WAIT(wait); kmutex_t *m; hrtime_t time_left; ktime_t ktime_left; + u64 slack = 0; ASSERT(cvp); ASSERT(mp); @@ -336,13 +367,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, * race where 'cvp->cv_waiters > 0' but the list is empty. */ mutex_exit(mp); - /* - * Allow a 100 us range to give kernel an opportunity to coalesce - * interrupts - */ + ktime_left = ktime_set(0, time_left); - schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC, - HRTIMER_MODE_REL); + slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), + MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); + schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { @@ -369,19 +398,10 @@ static clock_t cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag, int state) { - if (res > 1) { - /* - * Align expiration to the specified resolution. - */ - if (flag & CALLOUT_FLAG_ROUNDUP) - tim += res - 1; - tim = (tim / res) * res; - } - if (!(flag & CALLOUT_FLAG_ABSOLUTE)) tim += gethrtime(); - return (__cv_timedwait_hires(cvp, mp, tim, state)); + return (__cv_timedwait_hires(cvp, mp, tim, res, state)); } clock_t diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index cd5603a1a5c..1ffd862da12 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -672,7 +672,7 @@ mmp_thread(void *arg) CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv, - &mmp->mmp_thread_lock, next_time, USEC2NSEC(1), + &mmp->mmp_thread_lock, next_time, USEC2NSEC(100), CALLOUT_FLAG_ABSOLUTE); CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock); } From 2f1f18a6b41b078fac80257c00ba4385b60a63a3 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 29 Aug 2019 14:03:09 -0400 Subject: [PATCH 187/325] Use compatible arg order in tests BSD getopt() and getopt_long() want options before arguments. Reorder arguments to zfs/zpool in tests to put all the options first. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9228 --- .../functional/alloc_class/alloc_class_004_pos.ksh | 2 +- .../functional/alloc_class/alloc_class_005_pos.ksh | 4 ++-- .../cli_root/zfs_program/zfs_program_json.ksh | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh index dcc6f7607c9..79ac9364c25 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh @@ -52,7 +52,7 @@ do log_must zpool create $TESTPOOL $type $ZPOOL_DISKS \ special $stype $sdisks - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | nawk '{print $2}')" if [ "$ac_value" = "active" ]; then log_note "feature@allocation_classes is active" diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh index 417c68aa739..337114cdb59 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh @@ -41,7 +41,7 @@ do else log_must zpool create $TESTPOOL $type $ZPOOL_DISKS fi - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | awk '{print $2}')" if [ "$ac_value" = "enabled" ]; then log_note "feature@allocation_classes is enabled" @@ -56,7 +56,7 @@ do log_must zpool add $TESTPOOL special mirror \ $CLASS_DISK0 $CLASS_DISK1 fi - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | awk '{print $2}')" if [ "$ac_value" = "active" ]; then log_note "feature@allocation_classes is active" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh index 1d769096b4f..3d59f784a48 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh @@ -95,10 +95,10 @@ typeset -i cnt=0 typeset cmd for cmd in ${pos_cmds[@]}; do log_must zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 - log_must zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 + log_must zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 # json.tool is needed to guarantee consistent ordering of fields # sed is needed to trim trailing space in CentOS 6's json.tool output - OUTPUT=$(zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 | python -m json.tool | sed 's/[[:space:]]*$//') + OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 | python -m json.tool | sed 's/[[:space:]]*$//') if [ "$OUTPUT" != "${pos_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${pos_cmds_out[$cnt]}" @@ -120,9 +120,9 @@ For the property list, run: zfs set|get For the delegated permission list, run: zfs allow|unallow") cnt=0 for cmd in ${neg_cmds[@]}; do - log_mustnot zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 - log_mustnot zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 - OUTPUT=$(zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1) + log_mustnot zfs program $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1 + log_mustnot zfs program -j $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1 + OUTPUT=$(zfs program -j $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1) if [ "$OUTPUT" != "${neg_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${neg_cmds_out[$cnt]}" From 0302546b8dbbd0280a585a989327a6c995861b38 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 29 Aug 2019 16:11:29 -0400 Subject: [PATCH 188/325] Simplify deleting partitions in libtest Eliminate unnecessary code duplication. We can use a for-loop instead of a while-loop. There is no need to echo $DISKSARRAY in a subshell or return 0. Declare all variables with typeset. Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Ryan Moeller Closes #9224 --- tests/zfs-tests/include/libtest.shlib | 55 +++++---------------------- 1 file changed, 10 insertions(+), 45 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index c7cb36a8d0e..10949760081 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -953,61 +953,26 @@ function set_partition # function delete_partitions { - typeset -i j=1 + typeset disk - if [[ -z $DISK_ARRAY_NUM ]]; then - DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}') - fi if [[ -z $DISKSARRAY ]]; then DISKSARRAY=$DISKS fi if is_linux; then - if (( $DISK_ARRAY_NUM == 1 )); then - while ((j < MAX_PARTITIONS)); do - parted $DEV_DSKDIR/$DISK -s rm $j \ - > /dev/null 2>&1 - if (( $? == 1 )); then - lsblk | egrep ${DISK}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 1 )); then - log_note "Partitions for $DISK should be deleted" - else - log_fail "Partition for ${DISK}${SLICE_PREFIX}${j} not deleted" - fi - return 0 + typeset -i part + for disk in $DISKSARRAY; do + for (( part = 1; part < MAX_PARTITIONS; part++ )); do + typeset partition=${disk}${SLICE_PREFIX}${part} + parted $DEV_DSKDIR/$disk -s rm $part > /dev/null 2>&1 + if lsblk | grep -qF ${partition}; then + log_fail "Partition ${partition} not deleted" else - lsblk | egrep ${DISK}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 0 )); then - log_fail "Partition for ${DISK}${SLICE_PREFIX}${j} not deleted" - fi + log_note "Partition ${partition} deleted" fi - ((j = j+1)) - done - else - for disk in `echo $DISKSARRAY`; do - while ((j < MAX_PARTITIONS)); do - parted $DEV_DSKDIR/$disk -s rm $j > /dev/null 2>&1 - if (( $? == 1 )); then - lsblk | egrep ${disk}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 1 )); then - log_note "Partitions for $disk should be deleted" - else - log_fail "Partition for ${disk}${SLICE_PREFIX}${j} not deleted" - fi - j=7 - else - lsblk | egrep ${disk}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 0 )); then - log_fail "Partition for ${disk}${SLICE_PREFIX}${j} not deleted" - fi - fi - ((j = j+1)) - done - j=1 done - fi + done fi - return 0 } # From ebdb7705549f608200e74ed040fe1829b8bea4ab Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 30 Aug 2019 09:28:31 -0700 Subject: [PATCH 189/325] Prevent metaslab_sync panic due to spa_final_dirty_txg If a pool enables the SPACEMAP_HISTOGRAM feature shortly before being exported, we can enter a situation that causes a kernel panic. Any metaslabs that are loaded during the final dirty txg and haven't already been condensed will cause metaslab_sync to proceed after the final dirty txg so that the condense can be performed, which there are assertions to prevent. Because of the nature of this issue, there are a number of ways we can enter this state. Rather than try to prevent each of them one by one, potentially missing some edge cases, we instead cut it off at the point of intersection; by preventing metaslab_sync from proceeding if it would only do so to perform a condense and we're past the final dirty txg, we preserve the utility of the existing asserts while preventing this particular issue. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #9185 Closes #9186 Closes #9231 Closes #9253 --- module/zfs/metaslab.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 5da929b4843..faa175b7e7a 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -2832,12 +2832,19 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) /* * Normally, we don't want to process a metaslab if there are no * allocations or frees to perform. However, if the metaslab is being - * forced to condense and it's loaded, we need to let it through. + * forced to condense, it's loaded and we're not beyond the final + * dirty txg, we need to let it through. Not condensing beyond the + * final dirty txg prevents an issue where metaslabs that need to be + * condensed but were loaded for other reasons could cause a panic + * here. By only checking the txg in that branch of the conditional, + * we preserve the utility of the VERIFY statements in all other + * cases. */ if (range_tree_is_empty(alloctree) && range_tree_is_empty(msp->ms_freeing) && range_tree_is_empty(msp->ms_checkpointing) && - !(msp->ms_loaded && msp->ms_condense_wanted)) + !(msp->ms_loaded && msp->ms_condense_wanted && + txg <= spa_final_dirty_txg(spa))) return; From 619fda527a8149ca020b8baa75fe4ffe099e6d9f Mon Sep 17 00:00:00 2001 From: Igor K Date: Fri, 30 Aug 2019 19:32:25 +0300 Subject: [PATCH 190/325] Fix refquota_007_neg.ksh Must use 'zfs' instead of '$ZFS' which is undefined. Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #9257 --- .../tests/functional/refquota/refquota_007_neg.ksh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh index e2141c7d7f3..4f0393883b6 100755 --- a/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh +++ b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh @@ -38,9 +38,9 @@ verify_runnable "both" function cleanup { - log_must $ZFS destroy -rf $TESTPOOL/$TESTFS - log_must $ZFS create $TESTPOOL/$TESTFS - log_must $ZFS set mountpoint=$TESTDIR $TESTPOOL/$TESTFS + log_must zfs destroy -rf $TESTPOOL/$TESTFS + log_must zfs create $TESTPOOL/$TESTFS + log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS } log_onexit cleanup From 35c8730d1a085c937891c2c6b32728b8a2384b39 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:40:30 +0200 Subject: [PATCH 191/325] Fix typos in config/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9232 --- config/ax_code_coverage.m4 | 2 +- config/kernel-dentry-operations.m4 | 2 +- config/kernel-mkdir-umode-t.m4 | 2 +- config/kernel-timer.m4 | 2 +- config/lib-link.m4 | 2 +- config/pkg.m4 | 2 +- config/user.m4 | 2 +- config/zfs-build.m4 | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config/ax_code_coverage.m4 b/config/ax_code_coverage.m4 index 4417d4444a9..5cdfe14562a 100644 --- a/config/ax_code_coverage.m4 +++ b/config/ax_code_coverage.m4 @@ -50,7 +50,7 @@ # CODE_COVERAGE_LIBS is preferred for clarity; CODE_COVERAGE_LDFLAGS is # deprecated. They have the same value. # -# This code was derived from Makefile.decl in GLib, originally licenced +# This code was derived from Makefile.decl in GLib, originally licensed # under LGPLv2.1+. # # LICENSE diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index 61f5a27af5a..2cd2553010d 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -69,7 +69,7 @@ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], ]) dnl # -dnl # 2.6.38 API chage +dnl # 2.6.38 API change dnl # Added sb->s_d_op default dentry_operations member dnl # AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], diff --git a/config/kernel-mkdir-umode-t.m4 b/config/kernel-mkdir-umode-t.m4 index ebc21be9ec5..1c9fa9be3ce 100644 --- a/config/kernel-mkdir-umode-t.m4 +++ b/config/kernel-mkdir-umode-t.m4 @@ -4,7 +4,7 @@ dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a dnl # umode_t type rather than an int. The expectation is that any backport dnl # would also change all three prototypes. However, if it turns out that dnl # some distribution doesn't backport the whole thing this could be -dnl # broken apart in to three separate checks. +dnl # broken apart into three separate checks. dnl # AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t]) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index b0e1afa153a..d9064204307 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -6,7 +6,7 @@ dnl # (older kernels). Also sanity check the from_timer() and timer_setup() dnl # macros are available as well, since they will be used in the same newer dnl # kernels that support the new timer_list.func signature. dnl # -dnl # Also check for the existance of flags in struct timer_list, they were +dnl # Also check for the existence of flags in struct timer_list, they were dnl # added in 4.1-rc8 via 0eeda71bc30d. AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ diff --git a/config/lib-link.m4 b/config/lib-link.m4 index 0ff10731fac..01766c315c9 100644 --- a/config/lib-link.m4 +++ b/config/lib-link.m4 @@ -216,7 +216,7 @@ AC_DEFUN([AC_LIB_LINKFLAGS_BODY], fi ]) dnl Search the library and its dependencies in $additional_libdir and - dnl $LDFLAGS. Using breadth-first-seach. + dnl $LDFLAGS. Using breadth-first-search. LIB[]NAME= LTLIB[]NAME= INC[]NAME= diff --git a/config/pkg.m4 b/config/pkg.m4 index 13a88901786..f9075e56c87 100644 --- a/config/pkg.m4 +++ b/config/pkg.m4 @@ -86,7 +86,7 @@ dnl Check to see whether a particular set of modules exists. Similar to dnl PKG_CHECK_MODULES(), but does not set variables or print errors. dnl dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -dnl only at the first occurence in configure.ac, so if the first place +dnl only at the first occurrence in configure.ac, so if the first place dnl it's called might be skipped (such as if it is within an "if", you dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], diff --git a/config/user.m4 b/config/user.m4 index 1ee9dbe263b..3d97e9a418c 100644 --- a/config/user.m4 +++ b/config/user.m4 @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ dnl # dnl # Setup the environment for the ZFS Test Suite. Currently only -dnl # Linux sytle systems are supported but this infrastructure can +dnl # Linux style systems are supported but this infrastructure can dnl # be extended to support other platforms if needed. dnl # AC_DEFUN([ZFS_AC_TEST_FRAMEWORK], [ diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 8e221f2d7d4..c2e5bb25fe2 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -461,7 +461,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT]) AC_SUBST(DEFAULT_INIT_SCRIPT) - AC_MSG_CHECKING([default init config direectory]) + AC_MSG_CHECKING([default init config directory]) case "$VENDOR" in alpine) DEFAULT_INITCONF_DIR=/etc/conf.d ;; gentoo) DEFAULT_INITCONF_DIR=/etc/conf.d ;; From cac5f924ce88529f2c0a0b592ad33470ef544679 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:41:35 +0200 Subject: [PATCH 192/325] Fix typos in man/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9233 --- man/man1/cstyle.1 | 2 +- man/man1/ztest.1 | 2 +- man/man5/vdev_id.conf.5 | 2 +- man/man5/zfs-events.5 | 4 ++-- man/man5/zfs-module-parameters.5 | 4 ++-- man/man8/zdb.8 | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/man/man1/cstyle.1 b/man/man1/cstyle.1 index f2b637d4c36..f77d534507a 100644 --- a/man/man1/cstyle.1 +++ b/man/man1/cstyle.1 @@ -31,7 +31,7 @@ .IX "OS-Net build tools" "cstyle" "" "\fBcstyle\fP" .LP .I cstyle -inspects C source files (*.c and *.h) for common sylistic errors. It +inspects C source files (*.c and *.h) for common stylistic errors. It attempts to check for the cstyle documented in \fIhttp://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf\fP. Note that there is much in that document that diff --git a/man/man1/ztest.1 b/man/man1/ztest.1 index b8cb0d45d92..84e56c822d1 100644 --- a/man/man1/ztest.1 +++ b/man/man1/ztest.1 @@ -175,5 +175,5 @@ By default the stack size is limited to 256K. .BR "zfs (1)" "," .BR "zdb (1)" "," .SH "AUTHOR" -This manual page was transvered to asciidoc by Michael Gebetsroither +This manual page was transferred to asciidoc by Michael Gebetsroither from http://opensolaris.org/os/community/zfs/ztest/ diff --git a/man/man5/vdev_id.conf.5 b/man/man5/vdev_id.conf.5 index 5b7fbf0cad4..89c5ee96109 100644 --- a/man/man5/vdev_id.conf.5 +++ b/man/man5/vdev_id.conf.5 @@ -41,7 +41,7 @@ disk enclosure). .TP \fIenclosure_symlinks\fR Additionally create /dev/by-enclosure symlinks to the disk enclosure -sg devices using the naming scheme from from vdev_id.conf. +sg devices using the naming scheme from vdev_id.conf. \fIenclosure_symlinks\fR is only allowed for sas_direct mode. .TP \fIenclosure_symlinks_prefix\fR diff --git a/man/man5/zfs-events.5 b/man/man5/zfs-events.5 index 7e9bbedafda..4a28be71e68 100644 --- a/man/man5/zfs-events.5 +++ b/man/man5/zfs-events.5 @@ -557,7 +557,7 @@ How many write errors that have been detected on the vdev. \fBvdev_cksum_errors\fR .ad .RS 12n -How many checkum errors that have been detected on the vdev. +How many checksum errors that have been detected on the vdev. .RE .sp @@ -858,7 +858,7 @@ such as IDE or parallel SCSI. .RS 12n If this field exists, it is an array of counters. Each entry counts bit clears in a particular bit of a big-endian uint64 type. The first entry counts bits -clears of the the high-order bit of the first byte, the 9th byte, etc, and the +clears of the high-order bit of the first byte, the 9th byte, etc, and the last entry counts clears of the low-order bit of the 8th byte, the 16th byte, etc. This information is useful for observing a stuck bit in a parallel data path, such as IDE or parallel SCSI. diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 3395175d6eb..83d34025ab8 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -855,7 +855,7 @@ Default value: \fB0\fR. .ad .RS 12n Minimum time "prescient prefetched" blocks are locked in the ARC, specified -in ms. These blocks are meant to be prefetched fairly aggresively ahead of +in ms. These blocks are meant to be prefetched fairly aggressively ahead of the code that may use them. A value of \fB0\fR will default to 6000 ms. .sp Default value: \fB0\fR. @@ -2391,7 +2391,7 @@ Default value: \fB20\fR which is 5% of RAM (1/20). .ad .RS 12n The fraction of the hard limit used to determined the soft limit for I/O sorting -by the sequential scan algorithm. When we cross this limit from bellow no action +by the sequential scan algorithm. When we cross this limit from below no action is taken. When we cross this limit from above it is because we are issuing verification I/O. In this case (unless the metadata scan is done) we stop issuing verification I/O and start scanning metadata again until we get to the diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 57403cba74a..c28cf12baee 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -250,7 +250,7 @@ and, optionally, Print block pointer .It Sy d Decompress the block. Set environment variable -.Nm ZBD_NO_ZLE +.Nm ZDB_NO_ZLE to skip zle when guessing. .It Sy e Byte swap the block From eaf8e3b77932147cc138efc3d388442f0114f06d Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:43:30 +0200 Subject: [PATCH 193/325] Fix typos in cmd/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9234 --- cmd/arc_summary/arc_summary3 | 14 +++++++------- cmd/arcstat/Makefile.am | 2 +- cmd/dbufstat/Makefile.am | 2 +- cmd/fsck_zfs/fsck.zfs | 2 +- cmd/vdev_id/vdev_id | 2 +- cmd/zdb/zdb.c | 2 +- cmd/zed/agents/fmd_api.c | 2 +- cmd/zed/agents/fmd_serd.c | 2 +- cmd/zed/agents/zfs_mod.c | 6 +++--- cmd/zed/zed.d/statechange-led.sh | 4 ++-- cmd/zfs/zfs_main.c | 2 +- cmd/zinject/translate.c | 4 ++-- cmd/zpool/zpool_vdev.c | 4 ++-- cmd/zstreamdump/zstreamdump.c | 2 +- cmd/ztest/ztest.c | 2 +- 15 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index fc5e1e4b64c..d3327143849 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -43,7 +43,7 @@ import subprocess import sys import time -DECRIPTION = 'Print ARC and other statistics for ZFS on Linux' +DESCRIPTION = 'Print ARC and other statistics for ZFS on Linux' INDENT = ' '*8 LINE_LENGTH = 72 PROC_PATH = '/proc/spl/kstat/zfs/' @@ -65,7 +65,7 @@ SECTION_PATHS = {'arc': 'arcstats', 'zfetch': 'zfetchstats', 'zil': 'zil'} -parser = argparse.ArgumentParser(description=DECRIPTION) +parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('-a', '--alternate', action='store_true', default=False, help='use alternate formatting for tunables and SPL', dest='alt') @@ -284,7 +284,7 @@ def get_spl_tunables(PATH): def get_descriptions(request): - """Get the decriptions of the Solaris Porting Layer (SPL) or the + """Get the descriptions of the Solaris Porting Layer (SPL) or the tunables, return with minimal formatting. """ @@ -708,7 +708,7 @@ def section_l2arc(kstats_dict): def section_spl(*_): """Print the SPL parameters, if requested with alternative format - and/or decriptions. This does not use kstats. + and/or descriptions. This does not use kstats. """ spls = get_spl_tunables(SPL_PATH) @@ -725,7 +725,7 @@ def section_spl(*_): try: print(INDENT+'#', descriptions[key]) except KeyError: - print(INDENT+'# (No decription found)') # paranoid + print(INDENT+'# (No description found)') # paranoid print(format_raw_line(key, value)) @@ -734,7 +734,7 @@ def section_spl(*_): def section_tunables(*_): """Print the tunables, if requested with alternative format and/or - decriptions. This does not use kstasts. + descriptions. This does not use kstasts. """ tunables = get_spl_tunables(TUNABLES_PATH) @@ -751,7 +751,7 @@ def section_tunables(*_): try: print(INDENT+'#', descriptions[key]) except KeyError: - print(INDENT+'# (No decription found)') # paranoid + print(INDENT+'# (No description found)') # paranoid print(format_raw_line(key, value)) diff --git a/cmd/arcstat/Makefile.am b/cmd/arcstat/Makefile.am index 462e9a6197a..2d59faa9c87 100644 --- a/cmd/arcstat/Makefile.am +++ b/cmd/arcstat/Makefile.am @@ -1,7 +1,7 @@ dist_bin_SCRIPTS = arcstat # -# The arcstat script is compatibile with both Python 2.6 and 3.4. +# The arcstat script is compatible with both Python 2.6 and 3.4. # As such the python 3 shebang can be replaced at install time when # targeting a python 2 system. This allows us to maintain a single # version of the source. diff --git a/cmd/dbufstat/Makefile.am b/cmd/dbufstat/Makefile.am index 968a7607797..06923d38b2e 100644 --- a/cmd/dbufstat/Makefile.am +++ b/cmd/dbufstat/Makefile.am @@ -1,7 +1,7 @@ dist_bin_SCRIPTS = dbufstat # -# The dbufstat script is compatibile with both Python 2.6 and 3.4. +# The dbufstat script is compatible with both Python 2.6 and 3.4. # As such the python 3 shebang can be replaced at install time when # targeting a python 2 system. This allows us to maintain a single # version of the source. diff --git a/cmd/fsck_zfs/fsck.zfs b/cmd/fsck_zfs/fsck.zfs index f1685db6527..129a7f39c38 100755 --- a/cmd/fsck_zfs/fsck.zfs +++ b/cmd/fsck_zfs/fsck.zfs @@ -1,6 +1,6 @@ #!/bin/sh # -# fsck.zfs: A fsck helper to accomidate distributions that expect +# fsck.zfs: A fsck helper to accommodate distributions that expect # to be able to execute a fsck on all filesystem types. Currently # this script does nothing but it could be extended to act as a # compatibility wrapper for 'zpool scrub'. diff --git a/cmd/vdev_id/vdev_id b/cmd/vdev_id/vdev_id index 3796ab4885d..a79aed3b5d8 100755 --- a/cmd/vdev_id/vdev_id +++ b/cmd/vdev_id/vdev_id @@ -102,7 +102,7 @@ Usage: vdev_id [-h] vdev_id <-d device> [-c config_file] [-p phys_per_port] [-g sas_direct|sas_switch|scsi] [-m] - -c specify name of alernate config file [default=$CONFIG] + -c specify name of an alternative config file [default=$CONFIG] -d specify basename of device (i.e. sda) -e Create enclose device symlinks only (/dev/by-enclosure) -g Storage network topology [default="$TOPOLOGY"] diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 3e0e0575373..9744849083a 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -4779,7 +4779,7 @@ zdb_set_skip_mmp(char *target) * the name of the target pool. * * Note that the checkpointed state's pool name will be the name of - * the original pool with the above suffix appened to it. In addition, + * the original pool with the above suffix appended to it. In addition, * if the target is not a pool name (e.g. a path to a dataset) then * the new_path parameter is populated with the updated path to * reflect the fact that we are looking into the checkpointed state. diff --git a/cmd/zed/agents/fmd_api.c b/cmd/zed/agents/fmd_api.c index ae90a322cf9..607b387ca3a 100644 --- a/cmd/zed/agents/fmd_api.c +++ b/cmd/zed/agents/fmd_api.c @@ -25,7 +25,7 @@ */ /* - * This file imlements the minimal FMD module API required to support the + * This file implements the minimal FMD module API required to support the * fault logic modules in ZED. This support includes module registration, * memory allocation, module property accessors, basic case management, * one-shot timers and SERD engines. diff --git a/cmd/zed/agents/fmd_serd.c b/cmd/zed/agents/fmd_serd.c index 043552862e8..d4ec37fb769 100644 --- a/cmd/zed/agents/fmd_serd.c +++ b/cmd/zed/agents/fmd_serd.c @@ -281,7 +281,7 @@ fmd_serd_eng_empty(fmd_serd_eng_t *sgp) void fmd_serd_eng_reset(fmd_serd_eng_t *sgp) { - serd_log_msg(" SERD Engine: reseting %s", sgp->sg_name); + serd_log_msg(" SERD Engine: resetting %s", sgp->sg_name); while (sgp->sg_count != 0) fmd_serd_eng_discard(sgp, list_head(&sgp->sg_list)); diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 6d3e7cb1125..d980794d0a5 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -157,7 +157,7 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data) * 1. physical match with no fs, no partition * tag it top, partition disk * - * 2. physical match again, see partion and tag + * 2. physical match again, see partition and tag * */ @@ -674,7 +674,7 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi) devid, devpath ? devpath : "NULL", is_slice); /* - * Iterate over all vdevs looking for a match in the folllowing order: + * Iterate over all vdevs looking for a match in the following order: * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk) * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location). * @@ -892,7 +892,7 @@ zfs_enum_pools(void *arg) * * sent messages from zevents or udev monitor * - * For now, each agent has it's own libzfs instance + * For now, each agent has its own libzfs instance */ int zfs_slm_init() diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh index 6484b79592a..e656e125d37 100755 --- a/cmd/zed/zed.d/statechange-led.sh +++ b/cmd/zed/zed.d/statechange-led.sh @@ -20,7 +20,7 @@ # # Exit codes: # 0: enclosure led successfully set -# 1: enclosure leds not not available +# 1: enclosure leds not available # 2: enclosure leds administratively disabled # 3: The led sysfs path passed from ZFS does not exist # 4: $ZPOOL not set @@ -68,7 +68,7 @@ check_and_set_led() # timeout. for _ in $(seq 1 5); do # We want to check the current state first, since writing to the - # 'fault' entry always always causes a SES command, even if the + # 'fault' entry always causes a SES command, even if the # current state is already what you want. current=$(cat "${file}") diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 224a004d88d..3dd2b388690 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -7918,7 +7918,7 @@ zfs_do_change_key(int argc, char **argv) * 4) zfs project [-p id] [-r] [-s] * Set project ID and/or inherit flag on the file(s) or directories. * -p: Set the project ID as the given id. - * -r: Set on subdirectorie recursively. If not specify "-p" option, + * -r: Set on subdirectories recursively. If not specify "-p" option, * it will use top-level directory's project ID as the given id, * then set both project ID and inherit flag on all descendants * of the top-level directory. diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c index 700961b06a3..d4795d07110 100644 --- a/cmd/zinject/translate.c +++ b/cmd/zinject/translate.c @@ -176,7 +176,7 @@ object_from_path(const char *dataset, uint64_t object, zinject_record_t *record) } /* - * Intialize the range based on the type, level, and range given. + * Initialize the range based on the type, level, and range given. */ static int initialize_range(err_type_t type, int level, char *range, @@ -310,7 +310,7 @@ translate_record(err_type_t type, const char *object, const char *range, ziprintf("raw object: %llu\n", record->zi_object); /* - * For the given object, intialize the range in bytes + * For the given object, initialize the range in bytes */ if (initialize_range(type, level, (char *)range, record) != 0) goto err; diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 52c696816f7..ef2a30996e5 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -438,7 +438,7 @@ check_disk(const char *path, blkid_cache cache, int force, } /* - * Expected to fail for non-EFI labled disks. Just check the device + * Expected to fail for non-EFI labeled disks. Just check the device * as given and do not attempt to detect and scan partitions. */ err = efi_alloc_and_read(fd, &vtoc); @@ -1867,7 +1867,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, } /* - * Validate each device to make sure that its not shared with another + * Validate each device to make sure that it's not shared with another * subsystem. We do this even if 'force' is set, because there are some * uses (such as a dedicated dump device) that even '-f' cannot * override. diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index a65b4cef3d3..6b960c20f70 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -197,7 +197,7 @@ print_block(char *buf, int length) } /* - * Print an array of bytes to stdout as hexidecimal characters. str must + * Print an array of bytes to stdout as hexadecimal characters. str must * have buf_len * 2 + 1 bytes of space. */ static void diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 49833a42393..e83654a3262 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -74,7 +74,7 @@ * * To turn this into an overnight stress test, use -T to specify run time. * - * You can ask more more vdevs [-v], datasets [-d], or threads [-t] + * You can ask more vdevs [-v], datasets [-d], or threads [-t] * to increase the pool capacity, fanout, and overall stress level. * * Use the -k option to set the desired frequency of kills. From 44ae857ca403250511b1f16d601e0bf6e105aa48 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:44:43 +0200 Subject: [PATCH 194/325] Fix typos in contrib/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9235 --- contrib/initramfs/scripts/zfs.in | 12 ++++++------ contrib/pyzfs/docs/source/conf.py | 2 +- contrib/pyzfs/libzfs_core/_libzfs_core.py | 14 +++++++------- contrib/pyzfs/libzfs_core/_nvlist.py | 2 +- contrib/pyzfs/libzfs_core/exceptions.py | 4 ++-- contrib/pyzfs/libzfs_core/test/test_libzfs_core.py | 8 ++++---- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 9e90d76bb11..c82b210d7e9 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -78,7 +78,7 @@ find_rootfs() { local pool="$1" - # If 'POOL_IMPORTED' isn't set, no pool imported and therefor + # If 'POOL_IMPORTED' isn't set, no pool imported and therefore # we won't be able to find a root fs. [ -z "${POOL_IMPORTED}" ] && return 1 @@ -135,7 +135,7 @@ get_pools() # Get the base list of available pools. available_pools=$(find_pools "$ZPOOL" import) - # Just in case - seen it happen (that a pool isn't visable/found + # Just in case - seen it happen (that a pool isn't visible/found # with a simple "zpool import" but only when using the "-d" # option or setting ZPOOL_IMPORT_PATH). if [ -d "/dev/disk/by-id" ] @@ -401,7 +401,7 @@ mount_fs() return 0 } -# Unlock a ZFS native crypted filesystem. +# Unlock a ZFS native encrypted filesystem. decrypt_fs() { local fs="$1" @@ -606,7 +606,7 @@ setup_snapshot_booting() if ! grep -qiE '(^|[^\\](\\\\)* )(rollback)=(on|yes|1)( |$)' /proc/cmdline then # If the destination dataset for the clone - # already exists, destroy it. Recursivly + # already exists, destroy it. Recursively if [ $(get_fs_value "${rootfs}_${snapname}" type) ]; then filesystems=$("${ZFS}" list -oname -tfilesystem -H \ -r -Sname "${ZFS_BOOTFS}") @@ -616,7 +616,7 @@ setup_snapshot_booting() fi fi - # Get all snapshots, recursivly (might need to clone /usr, /var etc + # Get all snapshots, recursively (might need to clone /usr, /var etc # as well). for s in $("${ZFS}" list -H -oname -tsnapshot -r "${rootfs}" | \ grep "${snapname}") @@ -843,7 +843,7 @@ mountroot() # Strip 'zfs:' and 'ZFS='. ZFS_BOOTFS="${ROOT#*[:=]}" - # Stip everything after the first slash. + # Strip everything after the first slash. ZFS_RPOOL="${ZFS_BOOTFS%%/*}" fi diff --git a/contrib/pyzfs/docs/source/conf.py b/contrib/pyzfs/docs/source/conf.py index 4ffd7c93e5b..4bbb938b629 100644 --- a/contrib/pyzfs/docs/source/conf.py +++ b/contrib/pyzfs/docs/source/conf.py @@ -291,7 +291,7 @@ ####################### # Neutralize effects of function wrapping on documented signatures. -# The affected signatures could be explcitly placed into the +# The affected signatures could be explicitly placed into the # documentation (either in .rst files or as a first line of a # docstring). import functools diff --git a/contrib/pyzfs/libzfs_core/_libzfs_core.py b/contrib/pyzfs/libzfs_core/_libzfs_core.py index 5c8a1f5e690..ed3ea3201c2 100644 --- a/contrib/pyzfs/libzfs_core/_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/_libzfs_core.py @@ -300,7 +300,7 @@ def lzc_destroy_snaps(snaps, defer): Typical error is :exc:`SnapshotIsCloned` if `defer` is `False`. The snapshot names are validated quite loosely and invalid names are - typically ignored as nonexisiting snapshots. + typically ignored as nonexisting snapshots. A snapshot name referring to a filesystem that doesn't exist is ignored. @@ -470,7 +470,7 @@ def lzc_hold(holds, fd=None): Holds for snapshots which don't exist will be skipped and have an entry added to the return value, but will not cause an overall failure. No exceptions is raised if all holds, for snapshots that existed, were - succesfully created. + successfully created. Otherwise :exc:`.HoldFailure` exception is raised and no holds will be created. :attr:`.HoldFailure.errors` may contain a single element for an error that @@ -654,7 +654,7 @@ def lzc_send_space(snapname, fromsnap=None, flags=None): should be done. :param fromsnap: the optional starting snapshot name. If not `None` then an incremental stream size is estimated, otherwise - a full stream is esimated. + a full stream is estimated. :type fromsnap: `bytes` or `None` :param flags: the flags that control what enhanced features can be used in the stream. @@ -1178,11 +1178,11 @@ def receive_header(fd): the type of the dataset for which the stream has been created (volume, filesystem) ''' - # read sizeof(dmu_replay_record_t) bytes directly into the memort backing + # read sizeof(dmu_replay_record_t) bytes directly into the memory backing # 'record' record = _ffi.new("dmu_replay_record_t *") _ffi.buffer(record)[:] = os.read(fd, _ffi.sizeof(record[0])) - # get drr_begin member and its representation as a Pythn dict + # get drr_begin member and its representation as a Python dict drr_begin = record.drr_u.drr_begin header = {} for field, descr in _ffi.typeof(drr_begin).fields: @@ -1704,7 +1704,7 @@ def lzc_set_props(name, prop, val): # As the extended API is not committed yet, the names of the new interfaces # are not settled down yet. # It's not clear if atomically setting multiple properties is an achievable -# goal and an interface acting on mutiple entities must do so atomically +# goal and an interface acting on multiple entities must do so atomically # by convention. # Being able to set a single property at a time is sufficient for ClusterHQ. lzc_set_prop = lzc_set_props @@ -1741,7 +1741,7 @@ def lzc_list(name, options): Absence of this option implies all types. The first of the returned file descriptors can be used to - read the listing in a binary encounded format. The data is + read the listing in a binary encoded format. The data is a series of variable sized records each starting with a fixed size header, the header is followed by a serialized ``nvlist``. Each record describes a single element and contains the element's diff --git a/contrib/pyzfs/libzfs_core/_nvlist.py b/contrib/pyzfs/libzfs_core/_nvlist.py index fe4239a3c06..dc6d820bdea 100644 --- a/contrib/pyzfs/libzfs_core/_nvlist.py +++ b/contrib/pyzfs/libzfs_core/_nvlist.py @@ -113,7 +113,7 @@ def packed_nvlist_out(packed_nvlist, packed_size): :param bytes packed_nvlist: packed nvlist_t. :param int packed_size: nvlist_t packed size. - :return: an `dict` of values representing the data containted by nvlist_t. + :return: an `dict` of values representing the data contained by nvlist_t. :rtype: dict """ props = {} diff --git a/contrib/pyzfs/libzfs_core/exceptions.py b/contrib/pyzfs/libzfs_core/exceptions.py index f465cd3d930..f8a775433b3 100644 --- a/contrib/pyzfs/libzfs_core/exceptions.py +++ b/contrib/pyzfs/libzfs_core/exceptions.py @@ -77,7 +77,7 @@ def __str__(self): ZFSError.__str__(self), len(self.errors), self.suppressed_count) def __repr__(self): - return "%s(%r, %r, errors=%r, supressed=%r)" % ( + return "%s(%r, %r, errors=%r, suppressed=%r)" % ( self.__class__.__name__, self.errno, self.message, self.errors, self.suppressed_count) @@ -372,7 +372,7 @@ def __init__(self, name): class QuotaExceeded(ZFSError): errno = errno.EDQUOT - message = "Quouta exceeded" + message = "Quota exceeded" def __init__(self, name): self.name = name diff --git a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py index 25f20a4aeeb..8279cefc46d 100644 --- a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py @@ -1913,7 +1913,7 @@ def test_recv_incremental(self): filecmp.cmp( os.path.join(mnt1, name), os.path.join(mnt2, name), False)) - # This test case fails unless unless a patch from + # This test case fails unless a patch from # https://clusterhq.atlassian.net/browse/ZFS-20 # is applied to libzfs_core, otherwise it succeeds. @unittest.skip("fails with unpatched libzfs_core") @@ -2160,7 +2160,7 @@ def test_recv_incremental_non_clone_but_set_origin(self): with streams(srcfs, src1, src2) as (_, (full, incr)): lzc.lzc_receive(dst1, full.fileno()) lzc.lzc_snapshot([dst_snap]) - # becase cannot receive incremental and set origin on a non-clone + # because cannot receive incremental and set origin on a non-clone with self.assertRaises(lzc_exc.BadStream): lzc.lzc_receive(dst2, incr.fileno(), origin=dst1) @@ -2375,7 +2375,7 @@ def test_force_recv_full_existing_modified_mounted_fs(self): for i in range(1024): f.write(b'x' * 1024) lzc.lzc_receive(dst, stream.fileno(), force=True) - # The temporary file dissappears and any access, even close(), + # The temporary file disappears and any access, even close(), # results in EIO. self.assertFalse(os.path.exists(f.name)) with self.assertRaises(IOError): @@ -2462,7 +2462,7 @@ def test_force_recv_incremental_modified_mounted_fs(self): for i in range(1024): f.write(b'x' * 1024) lzc.lzc_receive(dst2, incr.fileno(), force=True) - # The temporary file dissappears and any access, even close(), + # The temporary file disappears and any access, even close(), # results in EIO. self.assertFalse(os.path.exists(f.name)) with self.assertRaises(IOError): From 10e8abf1aff4488fd07b384cb8bd165990ffb97b Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:46:52 +0200 Subject: [PATCH 195/325] Fix typos in etc/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9236 --- etc/init.d/zfs-functions.in | 4 ++-- etc/init.d/zfs-import.in | 4 ++-- etc/zfs/vdev_id.conf.sas_direct.example | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/etc/init.d/zfs-functions.in b/etc/init.d/zfs-functions.in index d65c79dcfd3..043f1b07398 100644 --- a/etc/init.d/zfs-functions.in +++ b/etc/init.d/zfs-functions.in @@ -72,7 +72,7 @@ elif type einfo > /dev/null 2>&1 ; then # zfs_log_progress_msg() { echo -n "$1"; } zfs_log_progress_msg() { echo -n; } else - # Unknown - simple substitues. + # Unknown - simple substitutes. zfs_log_begin_msg() { echo -n "$1"; } zfs_log_end_msg() { ret=$1 @@ -283,7 +283,7 @@ checksystem() # Called with zfs=(off|no|0) - bail because we don't # want anything import, mounted or shared. # HOWEVER, only do this if we're called at the boot up - # (from init), not if we're running interactivly (as in + # (from init), not if we're running interactively (as in # from the shell - we know what we're doing). [ -n "$init" ] && exit 3 fi diff --git a/etc/init.d/zfs-import.in b/etc/init.d/zfs-import.in index 420d2e8a7a4..47c957baac4 100644 --- a/etc/init.d/zfs-import.in +++ b/etc/init.d/zfs-import.in @@ -90,7 +90,7 @@ do_import_all_visible() already_imported=$(find_pools "$ZPOOL" list -H -oname) available_pools=$(find_pools "$ZPOOL" import) - # Just in case - seen it happen (that a pool isn't visable/found + # Just in case - seen it happen (that a pool isn't visible/found # with a simple "zpool import" but only when using the "-d" # option or setting ZPOOL_IMPORT_PATH). if [ -d "/dev/disk/by-id" ] @@ -187,7 +187,7 @@ do_import_all_visible() # Needs to be exported for "zpool" to catch it. [ -n "$ZPOOL_IMPORT_PATH" ] && export ZPOOL_IMPORT_PATH - # Mount all availible pools (except those set in ZFS_POOL_EXCEPTIONS. + # Mount all available pools (except those set in ZFS_POOL_EXCEPTIONS. # # If not interactive (run from init - variable init='/sbin/init') # we get ONE line for all pools being imported, with just a dot diff --git a/etc/zfs/vdev_id.conf.sas_direct.example b/etc/zfs/vdev_id.conf.sas_direct.example index 0a6f130cb2d..d17ed149d89 100644 --- a/etc/zfs/vdev_id.conf.sas_direct.example +++ b/etc/zfs/vdev_id.conf.sas_direct.example @@ -2,7 +2,7 @@ multipath no topology sas_direct phys_per_port 4 -# Additionally create /dev/by-enclousure/ symlinks for enclosure devices +# Additionally create /dev/by-enclosure/ symlinks for enclosure devices enclosure_symlinks yes # PCI_ID HBA PORT CHANNEL NAME From bcfa65802cbfdc46c5a2157b4db2d79f5a9a6278 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 18:53:15 +0200 Subject: [PATCH 196/325] Fix typos in include/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9238 --- include/linux/vfs_compat.h | 2 +- include/spl/sys/kmem_cache.h | 2 +- include/sys/arc.h | 2 +- include/sys/arc_impl.h | 2 +- include/sys/avl.h | 4 ++-- include/sys/dmu.h | 2 +- include/sys/efi_partition.h | 4 ++-- include/sys/fs/zfs.h | 4 ++-- include/sys/lua/luaconf.h | 2 +- include/sys/sa.h | 2 +- include/sys/txg_impl.h | 4 ++-- include/sys/vdev_raidz_impl.h | 2 +- include/sys/zcp.h | 2 +- include/sys/zfs_acl.h | 2 +- include/sys/zfs_vfsops.h | 4 ++-- include/sys/zil.h | 4 ++-- include/sys/zio_crypt.h | 6 +++--- include/sys/zio_impl.h | 2 +- 18 files changed, 26 insertions(+), 26 deletions(-) diff --git a/include/linux/vfs_compat.h b/include/linux/vfs_compat.h index 04a2c2b879f..28b454133c6 100644 --- a/include/linux/vfs_compat.h +++ b/include/linux/vfs_compat.h @@ -36,7 +36,7 @@ * 2.6.28 API change, * Added insert_inode_locked() helper function, prior to this most callers * used insert_inode_hash(). The older method doesn't check for collisions - * in the inode_hashtable but it still acceptible for use. + * in the inode_hashtable but it still acceptable for use. */ #ifndef HAVE_INSERT_INODE_LOCKED static inline int diff --git a/include/spl/sys/kmem_cache.h b/include/spl/sys/kmem_cache.h index bb413207def..4ee7bcae07e 100644 --- a/include/spl/sys/kmem_cache.h +++ b/include/spl/sys/kmem_cache.h @@ -30,7 +30,7 @@ /* * Slab allocation interfaces. The SPL slab differs from the standard * Linux SLAB or SLUB primarily in that each cache may be backed by slabs - * allocated from the physical or virtal memory address space. The virtual + * allocated from the physical or virtual memory address space. The virtual * slabs allow for good behavior when allocation large objects of identical * size. This slab implementation also supports both constructors and * destructors which the Linux slab does not. diff --git a/include/sys/arc.h b/include/sys/arc.h index dc2fd03647f..d7bb44b0200 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -187,7 +187,7 @@ typedef enum arc_buf_contents { } arc_buf_contents_t; /* - * The following breakdows of arc_size exist for kstat only. + * The following breakdowns of arc_size exist for kstat only. */ typedef enum arc_space_type { ARC_SPACE_DATA, diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index cd42c0c01a2..c8f551db731 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -39,7 +39,7 @@ extern "C" { * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) * ARC_mru - recently used, currently cached - * ARC_mru_ghost - recentely used, no longer in cache + * ARC_mru_ghost - recently used, no longer in cache * ARC_mfu - frequently used, currently cached * ARC_mfu_ghost - frequently used, no longer in cache * ARC_l2c_only - exists in L2ARC but not other states diff --git a/include/sys/avl.h b/include/sys/avl.h index 206b539fab5..962e8b1cfb6 100644 --- a/include/sys/avl.h +++ b/include/sys/avl.h @@ -97,7 +97,7 @@ extern "C" { * * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes. * Note that once you use avl_destroy_nodes(), you can no longer - * use any routine except avl_destroy_nodes() and avl_destoy(). + * use any routine except avl_destroy_nodes() and avl_destroy(). * * 4. Use avl_destroy() to destroy the AVL tree itself. * @@ -144,7 +144,7 @@ typedef uintptr_t avl_index_t; * user data structure which must contain a field of type avl_node_t. * * Also assume the user data structures looks like: - * stuct my_type { + * struct my_type { * ... * avl_node_t my_link; * ... diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 88c83617178..dd8d12376cc 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -465,7 +465,7 @@ int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, /* * Set the data blocksize for an object. * - * The object cannot have any blocks allcated beyond the first. If + * The object cannot have any blocks allocated beyond the first. If * the first block is allocated already, the new size must be greater * than the current block size. If these conditions are not met, * ENOTSUP will be returned. diff --git a/include/sys/efi_partition.h b/include/sys/efi_partition.h index 684b3e588a1..88bdfd2b1ca 100644 --- a/include/sys/efi_partition.h +++ b/include/sys/efi_partition.h @@ -297,11 +297,11 @@ typedef struct efi_gpe { * checksums, and perform any necessary byte-swapping to the on-disk * format. */ -/* Solaris library abstraction for EFI partitons */ +/* Solaris library abstraction for EFI partitions */ typedef struct dk_part { diskaddr_t p_start; /* starting LBA */ diskaddr_t p_size; /* size in blocks */ - struct uuid p_guid; /* partion type GUID */ + struct uuid p_guid; /* partition type GUID */ ushort_t p_tag; /* converted to part'n type GUID */ ushort_t p_flag; /* attributes */ char p_name[EFI_PART_NAME_LEN]; /* partition name */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c167a594a7d..6b780724245 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -954,7 +954,7 @@ typedef struct pool_scan_stat { /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ - uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */ + uint64_t pss_pass_scrub_pause; /* pause time of a scrub pass */ /* cumulative time scrub spent paused, needed for rate calculation */ uint64_t pss_pass_scrub_spent_paused; uint64_t pss_pass_issued; /* issued bytes per scan pass */ @@ -1028,7 +1028,7 @@ typedef struct vdev_stat { uint64_t vs_fragmentation; /* device fragmentation */ uint64_t vs_initialize_bytes_done; /* bytes initialized */ uint64_t vs_initialize_bytes_est; /* total bytes to initialize */ - uint64_t vs_initialize_state; /* vdev_initialzing_state_t */ + uint64_t vs_initialize_state; /* vdev_initializing_state_t */ uint64_t vs_initialize_action_time; /* time_t */ uint64_t vs_checkpoint_space; /* checkpoint-consumed space */ uint64_t vs_resilver_deferred; /* resilver deferred */ diff --git a/include/sys/lua/luaconf.h b/include/sys/lua/luaconf.h index 302c57a8c4b..fa7861336fc 100644 --- a/include/sys/lua/luaconf.h +++ b/include/sys/lua/luaconf.h @@ -495,7 +495,7 @@ extern int64_t lcompat_pow(int64_t, int64_t); ** a single double value, using NaN values to represent non-number ** values. The trick only works on 32-bit machines (ints and pointers ** are 32-bit values) with numbers represented as IEEE 754-2008 doubles -** with conventional endianess (12345678 or 87654321), in CPUs that do +** with conventional endianness (12345678 or 87654321), in CPUs that do ** not produce signaling NaN values (all NaNs are quiet). */ diff --git a/include/sys/sa.h b/include/sys/sa.h index 50b90622164..432e0bc415c 100644 --- a/include/sys/sa.h +++ b/include/sys/sa.h @@ -51,7 +51,7 @@ typedef uint16_t sa_attr_type_t; typedef struct sa_attr_reg { char *sa_name; /* attribute name */ uint16_t sa_length; - sa_bswap_type_t sa_byteswap; /* bswap functon enum */ + sa_bswap_type_t sa_byteswap; /* bswap function enum */ sa_attr_type_t sa_attr; /* filled in during registration */ } sa_attr_reg_t; diff --git a/include/sys/txg_impl.h b/include/sys/txg_impl.h index 4e05214919d..047d51b94c6 100644 --- a/include/sys/txg_impl.h +++ b/include/sys/txg_impl.h @@ -43,7 +43,7 @@ extern "C" { * the number of active transaction holds (tc_count). As transactions * are assigned into a transaction group the appropriate tc_count is * incremented to indicate that there are pending changes that have yet - * to quiesce. Consumers evenutally call txg_rele_to_sync() to decrement + * to quiesce. Consumers eventually call txg_rele_to_sync() to decrement * the tc_count. A transaction group is not considered quiesced until all * tx_cpu structures have reached a tc_count of zero. * @@ -78,7 +78,7 @@ struct tx_cpu { /* * The tx_state structure maintains the state information about the different - * stages of the pool's transcation groups. A per pool tx_state structure + * stages of the pool's transaction groups. A per pool tx_state structure * is used to track this information. The tx_state structure also points to * an array of tx_cpu structures (described above). Although the tx_sync_lock * is used to protect the members of this structure, it is not used to diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h index 0799ed19dfc..94960ba957c 100644 --- a/include/sys/vdev_raidz_impl.h +++ b/include/sys/vdev_raidz_impl.h @@ -158,7 +158,7 @@ extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; * * raidz_parity Returns parity of the RAIDZ block * raidz_ncols Returns number of columns the block spans - * raidz_nbigcols Returns number of big columns columns + * raidz_nbigcols Returns number of big columns * raidz_col_p Returns pointer to a column * raidz_col_size Returns size of a column * raidz_big_size Returns size of big columns diff --git a/include/sys/zcp.h b/include/sys/zcp.h index b720d863779..5cc520da5c5 100644 --- a/include/sys/zcp.h +++ b/include/sys/zcp.h @@ -149,7 +149,7 @@ typedef struct zcp_arg { /* * The name of this argument. For keyword arguments this is the name * functions will use to set the argument. For positional arguments - * the name has no programatic meaning, but will appear in error + * the name has no programmatic meaning, but will appear in error * messages and help output. */ const char *za_name; diff --git a/include/sys/zfs_acl.h b/include/sys/zfs_acl.h index 6d3db504160..747f4e57e2a 100644 --- a/include/sys/zfs_acl.h +++ b/include/sys/zfs_acl.h @@ -62,7 +62,7 @@ struct znode_phys; /* * All ACEs have a common hdr. For * owner@, group@, and everyone@ this is all - * thats needed. + * that's needed. */ typedef struct zfs_ace_hdr { uint16_t z_type; diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index 42f534f5db6..457d027baf9 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -46,7 +46,7 @@ struct znode; /* * This structure emulates the vfs_t from other platforms. It's purpose - * is to faciliate the handling of mount options and minimize structural + * is to facilitate the handling of mount options and minimize structural * differences between the platforms. */ typedef struct vfs { @@ -105,7 +105,7 @@ struct zfsvfs { list_t z_all_znodes; /* all znodes in the fs */ uint64_t z_nr_znodes; /* number of znodes in the fs */ unsigned long z_rollback_time; /* last online rollback time */ - unsigned long z_snap_defer_time; /* last snapshot unmount deferal */ + unsigned long z_snap_defer_time; /* last snapshot unmount deferral */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ arc_prune_t *z_arc_prune; /* called by ARC to prune caches */ struct inode *z_ctldir; /* .zfs directory inode */ diff --git a/include/sys/zil.h b/include/sys/zil.h index cfa5e399550..6b038a9dd22 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -80,7 +80,7 @@ typedef struct zil_header { * Log blocks are chained together. Originally they were chained at the * end of the block. For performance reasons the chain was moved to the * beginning of the block which allows writes for only the data being used. - * The older position is supported for backwards compatability. + * The older position is supported for backwards compatibility. * * The zio_eck_t contains a zec_cksum which for the intent log is * the sequence number of this log block. A seq of 0 is invalid. @@ -421,7 +421,7 @@ typedef struct zil_stats { /* * Number of transactions (reads, writes, renames, etc.) - * that have been commited. + * that have been committed. */ kstat_named_t zil_itx_count; diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h index d54e2fe192f..a029127914b 100644 --- a/include/sys/zio_crypt.h +++ b/include/sys/zio_crypt.h @@ -55,7 +55,7 @@ typedef struct zio_crypt_info { /* length of the encryption key */ size_t ci_keylen; - /* human-readable name of the encryption alforithm */ + /* human-readable name of the encryption algorithm */ char *ci_name; } zio_crypt_info_t; @@ -78,7 +78,7 @@ typedef struct zio_crypt_key { /* buffer for hmac key */ uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN]; - /* buffer for currrent encryption key derived from master key */ + /* buffer for current encryption key derived from master key */ uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN]; /* current 64 bit salt for deriving an encryption key */ @@ -99,7 +99,7 @@ typedef struct zio_crypt_key { /* template of hmac key for illumos crypto api */ crypto_ctx_template_t zk_hmac_tmpl; - /* lock for changing the salt and dependant values */ + /* lock for changing the salt and dependent values */ krwlock_t zk_salt_lock; } zio_crypt_key_t; diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index fbbe06eb04f..8ca12463176 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -87,7 +87,7 @@ extern "C" { * * NOP Write: * The NOP write feature is performed by the ZIO_STAGE_NOP_WRITE stage - * and is added to an existing write pipeline if a crypographically + * and is added to an existing write pipeline if a cryptographically * secure checksum (i.e. SHA256) is enabled and compression is turned on. * The NOP write stage will compare the checksums of the current data * on-disk (level-0 blocks only) and the data that is currently being written. From 8c01eb1c4ae4aa4c694f775b498f0d54475f80b3 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 23:26:07 +0200 Subject: [PATCH 197/325] Fix typos in modules/icp/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9239 --- module/icp/algs/skein/skein_block.c | 6 +++--- module/icp/api/kcf_ctxops.c | 4 ++-- module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl | 2 +- module/icp/asm-x86_64/aes/aesopt.h | 2 +- module/icp/core/kcf_mech_tabs.c | 2 +- module/icp/core/kcf_sched.c | 12 ++++++------ module/icp/illumos-crypto.c | 2 +- module/icp/include/sys/crypto/impl.h | 2 +- module/icp/include/sys/crypto/sched_impl.h | 6 +++--- module/icp/include/sys/crypto/spi.h | 2 +- module/icp/os/modhash.c | 2 +- module/icp/spi/kcf_spi.c | 2 +- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c index 6d85cb7d9e9..7ba165a4851 100644 --- a/module/icp/algs/skein/skein_block.c +++ b/module/icp/algs/skein/skein_block.c @@ -159,7 +159,7 @@ Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) #endif { @@ -385,7 +385,7 @@ Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R)+2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) #endif /* end of looped code definitions */ { @@ -667,7 +667,7 @@ Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) #endif { diff --git a/module/icp/api/kcf_ctxops.c b/module/icp/api/kcf_ctxops.c index b9b9cb74e04..21b0977d363 100644 --- a/module/icp/api/kcf_ctxops.c +++ b/module/icp/api/kcf_ctxops.c @@ -63,7 +63,7 @@ * * Returns: * CRYPTO_SUCCESS when the context template is successfully created. - * CRYPTO_HOST_MEMEORY: mem alloc failure + * CRYPTO_HOST_MEMORY: mem alloc failure * CRYPTO_ARGUMENTS_BAD: NULL storage for the ctx template. * RYPTO_MECHANISM_INVALID: invalid mechanism 'mech'. */ @@ -123,7 +123,7 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, * crypto_create_ctx_template() * * Description: - * Frees the inbedded crypto_spi_ctx_template_t, then the + * Frees the embedded crypto_spi_ctx_template_t, then the * kcf_ctx_template_t. * * Context: diff --git a/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl b/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl index a2c4adcbe6a..92c9e196a31 100644 --- a/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl +++ b/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl @@ -101,7 +101,7 @@ * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library + * The word 'cryptographic' can be left out if the routines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: diff --git a/module/icp/asm-x86_64/aes/aesopt.h b/module/icp/asm-x86_64/aes/aesopt.h index 6aa61db8275..472111f96e5 100644 --- a/module/icp/asm-x86_64/aes/aesopt.h +++ b/module/icp/asm-x86_64/aes/aesopt.h @@ -327,7 +327,7 @@ extern "C" { * On some systems speed will be improved by aligning the AES large lookup * tables on particular boundaries. This define should be set to a power of * two giving the desired alignment. It can be left undefined if alignment - * is not needed. This option is specific to the Micrsoft VC++ compiler - + * is not needed. This option is specific to the Microsoft VC++ compiler - * it seems to sometimes cause trouble for the VC++ version 6 compiler. */ diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c index 741dae7a748..2642b317d69 100644 --- a/module/icp/core/kcf_mech_tabs.c +++ b/module/icp/core/kcf_mech_tabs.c @@ -103,7 +103,7 @@ kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = { * Per-algorithm internal thresholds for the minimum input size of before * offloading to hardware provider. * Dispatching a crypto operation to a hardware provider entails paying the - * cost of an additional context switch. Measurments with Sun Accelerator 4000 + * cost of an additional context switch. Measurements with Sun Accelerator 4000 * shows that 512-byte jobs or smaller are better handled in software. * There is room for refinement here. * diff --git a/module/icp/core/kcf_sched.c b/module/icp/core/kcf_sched.c index da2346f7ec2..c8c2bbd42b9 100644 --- a/module/icp/core/kcf_sched.c +++ b/module/icp/core/kcf_sched.c @@ -182,7 +182,7 @@ kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx, * reached, signal the creator thread for more threads. * * If the two conditions above are not met, we don't need to do - * any thing. The request will be picked up by one of the + * anything. The request will be picked up by one of the * worker threads when it becomes available. */ static int @@ -1182,7 +1182,7 @@ kcf_aop_done(kcf_areq_node_t *areq, int error) /* * Handle recoverable errors. This has to be done first - * before doing any thing else in this routine so that + * before doing anything else in this routine so that * we do not change the state of the request. */ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { @@ -1432,7 +1432,7 @@ crypto_cancel_req(crypto_req_id_t id) /* * There is no interface to remove an entry * once it is on the taskq. So, we do not do - * any thing for a hardware provider. + * anything for a hardware provider. */ break; default: @@ -1535,7 +1535,7 @@ kcf_misc_kstat_update(kstat_t *ksp, int rw) } /* - * Allocate and initiatize a kcf_dual_req, used for saving the arguments of + * Allocate and initialize a kcf_dual_req, used for saving the arguments of * a dual operation or an atomic operation that has to be internally * simulated with multiple single steps. * crq determines the memory allocation flags. @@ -1551,7 +1551,7 @@ kcf_alloc_req(crypto_call_req_t *crq) if (kcr == NULL) return (NULL); - /* Copy the whole crypto_call_req struct, as it isn't persistant */ + /* Copy the whole crypto_call_req struct, as it isn't persistent */ if (crq != NULL) kcr->kr_callreq = *crq; else @@ -1579,7 +1579,7 @@ kcf_next_req(void *next_req_arg, int status) kcf_provider_desc_t *pd = NULL; crypto_dual_data_t *ct = NULL; - /* Stop the processing if an error occured at this step */ + /* Stop the processing if an error occurred at this step */ if (error != CRYPTO_SUCCESS) { out: areq->an_reqarg = next_req->kr_callreq; diff --git a/module/icp/illumos-crypto.c b/module/icp/illumos-crypto.c index c2fcf1ff729..3c5ef439394 100644 --- a/module/icp/illumos-crypto.c +++ b/module/icp/illumos-crypto.c @@ -93,7 +93,7 @@ * will use the generic implementation. * * 7) Removing sha384 and sha512 code: The sha code was actually very - * wasy to port. However, the generic sha384 and sha512 code actually + * easy to port. However, the generic sha384 and sha512 code actually * exceeds the stack size on arm and powerpc architectures. In an effort * to remove warnings, this code was removed. * diff --git a/module/icp/include/sys/crypto/impl.h b/module/icp/include/sys/crypto/impl.h index 258cb5fedcd..0f37f3f6353 100644 --- a/module/icp/include/sys/crypto/impl.h +++ b/module/icp/include/sys/crypto/impl.h @@ -237,7 +237,7 @@ typedef struct kcf_provider_list { struct kcf_provider_desc *pl_provider; } kcf_provider_list_t; -/* atomic operations in linux implictly form a memory barrier */ +/* atomic operations in linux implicitly form a memory barrier */ #define membar_exit() /* diff --git a/module/icp/include/sys/crypto/sched_impl.h b/module/icp/include/sys/crypto/sched_impl.h index 32ffa774957..85ea0ba1d09 100644 --- a/module/icp/include/sys/crypto/sched_impl.h +++ b/module/icp/include/sys/crypto/sched_impl.h @@ -381,7 +381,7 @@ typedef struct kcf_pool { /* * cv & lock for the condition where more threads need to be - * created. kp_user_lock also protects the three fileds above. + * created. kp_user_lock also protects the three fields above. */ kcondvar_t kp_user_cv; /* Creator cond. variable */ kmutex_t kp_user_lock; /* Creator lock */ @@ -448,13 +448,13 @@ typedef struct kcf_ntfy_elem { * The following values are based on the assumption that it would * take around eight cpus to load a hardware provider (This is true for * at least one product) and a kernel client may come from different - * low-priority interrupt levels. We will have CYRPTO_TASKQ_MIN number + * low-priority interrupt levels. We will have CRYPTO_TASKQ_MIN number * of cached taskq entries. The CRYPTO_TASKQ_MAX number is based on * a throughput of 1GB/s using 512-byte buffers. These are just * reasonable estimates and might need to change in future. */ #define CRYPTO_TASKQ_THREADS 8 -#define CYRPTO_TASKQ_MIN 64 +#define CRYPTO_TASKQ_MIN 64 #define CRYPTO_TASKQ_MAX 2 * 1024 * 1024 extern int crypto_taskq_threads; diff --git a/module/icp/include/sys/crypto/spi.h b/module/icp/include/sys/crypto/spi.h index 0aae9181adc..2c62b570665 100644 --- a/module/icp/include/sys/crypto/spi.h +++ b/module/icp/include/sys/crypto/spi.h @@ -699,7 +699,7 @@ typedef struct crypto_provider_info { /* * Provider status passed by a provider to crypto_provider_notification(9F) - * and returned by the provider_stauts(9E) entry point. + * and returned by the provider_status(9E) entry point. */ #define CRYPTO_PROVIDER_READY 0 #define CRYPTO_PROVIDER_BUSY 1 diff --git a/module/icp/os/modhash.c b/module/icp/os/modhash.c index 497e8439666..5e216ed6a04 100644 --- a/module/icp/os/modhash.c +++ b/module/icp/os/modhash.c @@ -48,7 +48,7 @@ * The number returned need _not_ be between 0 and nchains. The mod_hash * code will take care of doing that. The second argument (after the * key) to the hashing function is a void * that represents - * hash_alg_data-- this is provided so that the hashing algrorithm can + * hash_alg_data-- this is provided so that the hashing algorithm can * maintain some state across calls, or keep algorithm-specific * constants associated with the hash table. * diff --git a/module/icp/spi/kcf_spi.c b/module/icp/spi/kcf_spi.c index 0a6e38df862..e438b58105b 100644 --- a/module/icp/spi/kcf_spi.c +++ b/module/icp/spi/kcf_spi.c @@ -40,7 +40,7 @@ * minalloc and maxalloc values to be used for taskq_create(). */ int crypto_taskq_threads = CRYPTO_TASKQ_THREADS; -int crypto_taskq_minalloc = CYRPTO_TASKQ_MIN; +int crypto_taskq_minalloc = CRYPTO_TASKQ_MIN; int crypto_taskq_maxalloc = CRYPTO_TASKQ_MAX; static void remove_provider(kcf_provider_desc_t *); From 48d8b249c96f99a52c5c50b3e23ae6d18e14289f Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Fri, 30 Aug 2019 23:32:18 +0200 Subject: [PATCH 198/325] Fix typos in module/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9241 --- module/avl/avl.c | 2 +- module/lua/llimits.h | 2 +- module/nvpair/nvpair.c | 6 +++--- module/spl/spl-condvar.c | 4 ++-- module/spl/spl-generic.c | 4 ++-- module/spl/spl-kmem-cache.c | 4 ++-- module/spl/spl-tsd.c | 4 ++-- module/zcommon/zfeature_common.c | 2 +- module/zcommon/zfs_namecheck.c | 2 +- module/zcommon/zpool_prop.c | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/module/avl/avl.c b/module/avl/avl.c index 736dcee8457..1d2843f0e71 100644 --- a/module/avl/avl.c +++ b/module/avl/avl.c @@ -159,7 +159,7 @@ avl_walk(avl_tree_t *tree, void *oldnode, int left) node = node->avl_child[right]) ; /* - * Otherwise, return thru left children as far as we can. + * Otherwise, return through left children as far as we can. */ } else { for (;;) { diff --git a/module/lua/llimits.h b/module/lua/llimits.h index eee8f0c2d53..2126a14648d 100644 --- a/module/lua/llimits.h +++ b/module/lua/llimits.h @@ -98,7 +98,7 @@ typedef LUAI_UACNUMBER l_uacNumber; /* ** non-return type ** -** Supress noreturn attribute in kernel builds to avoid objtool check warnings +** Suppress noreturn attribute in kernel builds to avoid objtool check warnings */ #if defined(__GNUC__) && !defined(_KERNEL) #define l_noret void __attribute__((noreturn)) diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index 5f6423ccce7..c5bd98ebd05 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -1872,7 +1872,7 @@ nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...) * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate * multiple levels of embedded nvlists, with 'sep' as the separator. As an * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or - * "a.d[3].e[1]". This matches the C syntax for array embed (for convience, + * "a.d[3].e[1]". This matches the C syntax for array embed (for convenience, * code also supports "a.d[3]e[1]" syntax). * * If 'ip' is non-NULL and the last name component is an array, return the @@ -3105,7 +3105,7 @@ nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) * * An xdr packed nvlist is encoded as: * - * - encoding methode and host endian (4 bytes) + * - encoding method and host endian (4 bytes) * - nvl_version (4 bytes) * - nvl_nvflag (4 bytes) * @@ -3499,7 +3499,7 @@ nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) * the strings. These pointers are not encoded into the packed xdr buffer. * * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are - * of length 0, then each string is endcoded in xdr format as a single word. + * of length 0, then each string is encoded in xdr format as a single word. * Therefore when expanded to an nvpair there will be 2.25 word used for * each string. (a int64_t allocated for pointer usage, and a single char * for the null termination.) diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 664fae1e719..3cc33da6298 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -431,8 +431,8 @@ __cv_signal(kcondvar_t *cvp) /* * All waiters are added with WQ_FLAG_EXCLUSIVE so only one - * waiter will be set runable with each call to wake_up(). - * Additionally wake_up() holds a spin_lock assoicated with + * waiter will be set runnable with each call to wake_up(). + * Additionally wake_up() holds a spin_lock associated with * the wait queue to ensure we don't race waking up processes. */ if (atomic_read(&cvp->cv_waiters) > 0) diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index 3c5ef60bd1a..1deb2f444cd 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -79,7 +79,7 @@ EXPORT_SYMBOL(p0); * to generate words larger than 128 bits will paradoxically be limited to * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` * 128-bit words and selecting the first will implicitly select the second. If - * a caller finds this behavior undesireable, random_get_bytes() should be used + * a caller finds this behavior undesirable, random_get_bytes() should be used * instead. * * XXX: Linux interrupt handlers that trigger within the critical section @@ -207,7 +207,7 @@ nlz64(uint64_t x) /* * Newer kernels have a div_u64() function but we define our own - * to simplify portibility between kernel versions. + * to simplify portability between kernel versions. */ static inline uint64_t __div_u64(uint64_t u, uint32_t v) diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 44e112cccbd..b39867b0374 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -185,7 +185,7 @@ MODULE_PARM_DESC(spl_kmem_cache_kmem_threads, struct list_head spl_kmem_cache_list; /* List of caches */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ -taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */ +taskq_t *spl_kmem_cache_taskq; /* Task queue for aging / reclaim */ static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj); @@ -995,7 +995,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, #if defined(SLAB_USERCOPY) /* * Required for PAX-enabled kernels if the slab is to be - * used for coping between user and kernel space. + * used for copying between user and kernel space. */ slabflags |= SLAB_USERCOPY; #endif diff --git a/module/spl/spl-tsd.c b/module/spl/spl-tsd.c index 4c800292ae7..14342d5a618 100644 --- a/module/spl/spl-tsd.c +++ b/module/spl/spl-tsd.c @@ -42,7 +42,7 @@ * type is entry is called a 'key' entry and it is added to the hash during * tsd_create(). It is used to store the address of the destructor function * and it is used as an anchor point. All tsd entries which use the same - * key will be linked to this entry. This is used during tsd_destory() to + * key will be linked to this entry. This is used during tsd_destroy() to * quickly call the destructor function for all tsd associated with the key. * The 'key' entry may be looked up with tsd_hash_search() by passing the * key you wish to lookup and DTOR_PID constant as the pid. @@ -269,7 +269,7 @@ tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor) * @table: hash table * @pid: search pid * - * For every process these is a single entry in the hash which is used + * For every process there is a single entry in the hash which is used * as anchor. All other thread specific entries for this process are * linked to this anchor via the 'he_pid_list' list head. */ diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index dc0c1161f8b..9f74f0fbd26 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -485,7 +485,7 @@ zpool_feature_init(void) zfeature_register(SPA_FEATURE_RESILVER_DEFER, "com.datto:resilver_defer", "resilver_defer", - "Support for defering new resilvers when one is already running.", + "Support for deferring new resilvers when one is already running.", ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL); } diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index b1e0de6d818..bf5b77912a1 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -74,7 +74,7 @@ get_dataset_depth(const char *path) /* * Keep track of nesting until you hit the end of the - * path or found the snapshot/bookmark seperator. + * path or found the snapshot/bookmark separator. */ for (int i = 0; loc[i] != '\0' && loc[i] != '@' && diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index ac1c42b3f07..edb4f60e6f2 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -156,7 +156,7 @@ zpool_name_to_prop(const char *propname) /* * Given a pool property ID, returns the corresponding name. - * Assuming the pool propety ID is valid. + * Assuming the pool property ID is valid. */ const char * zpool_prop_to_name(zpool_prop_t prop) From 8e1f209fa1eb00f5e59cb61baa2a24811df32b2c Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 31 Aug 2019 01:52:00 +0200 Subject: [PATCH 199/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini --- .../tests/functional/redundancy/redundancy_001_pos.ksh | 2 +- .../zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh | 2 +- .../tests/functional/refreserv/refreserv_multi_raidz.ksh | 2 +- tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh | 2 +- .../tests/functional/reservation/reservation_001_pos.ksh | 2 +- .../tests/functional/reservation/reservation_008_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/rsend.kshlib | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh b/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh index e25a48be8df..b5557f1f7e4 100755 --- a/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh @@ -41,7 +41,7 @@ # 3. Fill the filesystem with directories and files. # 4. Record all the files and directories checksum information. # 5. Damaged one of the virtual disk file. -# 6. Verify the data is correct to prove raidz can withstand 1 devicd is +# 6. Verify the data is correct to prove raidz can withstand 1 device is # failing. # diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh index da36609f2c4..3e5a78cf944 100755 --- a/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh @@ -38,7 +38,7 @@ # space outside of this refreservation. # # STRATEGY: -# 1. Setting quota and refservation +# 1. Setting quota and refreservation # 2. Verify snapshot can be created, when used =< quota - refreserv # 3. Verify failed to create snapshot, when used > quota - refreserv # diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh index 803e391c9ce..c904a807f17 100755 --- a/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh @@ -125,7 +125,7 @@ done log_note "sizes=$(print -C sizes)" # -# Helper furnction for checking that refreservation is calculated properly in +# Helper function for checking that refreservation is calculated properly in # multi-vdev pools. "Properly" is defined as assuming that all vdevs are as # space inefficient as the worst one. # diff --git a/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh index cb8bd6b810c..e5d8261e80b 100755 --- a/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh +++ b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh @@ -64,7 +64,7 @@ log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK # # Normally, we expect nopwrites to avoid allocating new blocks, but # after a device has been removed the DVAs will get remapped when -# a L0's indirect bloock is written. This will negate the effects +# a L0's indirect block is written. This will negate the effects # of nopwrite and should result in new allocations. # diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh b/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh index b72b8e4a388..b8220791f1d 100755 --- a/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh @@ -115,7 +115,7 @@ for obj in $TESTPOOL/$TESTFS $OBJ_LIST; do # # Due to the way space is consumed and released by metadata we - # can't do an exact check here, but we do do a basic sanity + # can't do an exact check here, but we do a basic sanity # check. # log_must within_limits $space_avail $new_space_avail $RESV_TOLERANCE diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh b/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh index fbf4276e8bd..a0cd039b183 100755 --- a/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh @@ -85,7 +85,7 @@ resv_size_set=`expr $resv_space_avail / $num_resv_fs` # # We set the reservations now, rather than when we created the filesystems -# to allow us to take into account space used by the filsystem metadata +# to allow us to take into account space used by the filesystem metadata # # Note we don't set a reservation on the first filesystem we created, # hence num=1 rather than zero below. diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 8737ae55abf..f51786083f3 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -343,7 +343,7 @@ function getds_with_suffix } # -# Output inherited properties whitch is edited for file system +# Output inherited properties which is edited for file system # function fs_inherit_prop { From 7572926bc50d3b7ef5b63f87aa745224578cec22 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 31 Aug 2019 01:53:48 +0200 Subject: [PATCH 200/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9248 --- .../tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh | 4 ++-- .../cli_root/zfs_rollback/zfs_rollback_common.kshlib | 2 +- .../tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh | 2 +- .../tests/functional/cli_root/zfs_set/cache_002_neg.ksh | 2 +- .../tests/functional/cli_root/zfs_set/canmount_002_pos.ksh | 2 +- .../tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh | 2 +- .../tests/functional/cli_root/zfs_set/zfs_set_common.kshlib | 4 ++-- .../functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh | 2 +- .../functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh | 2 +- .../functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh | 4 ++-- .../functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh | 4 ++-- .../functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh | 2 +- .../functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh | 2 +- .../functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh | 2 +- .../tests/functional/cli_root/zpool/zpool_001_neg.ksh | 2 +- 15 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh index 2042b37a98f..e2e2c5f010f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -31,11 +31,11 @@ # 4. Attempt to receive a raw send stream as a child of an unencrypted dataset # 5. Verify the key is unavailable # 6. Attempt to load the key and mount the dataset -# 7. Verify the cheksum of the file is the same as the original +# 7. Verify the checksum of the file is the same as the original # 8. Attempt to receive a raw send stream as a child of an encrypted dataset # 9. Verify the key is unavailable # 10. Attempt to load the key and mount the dataset -# 11. Verify the cheksum of the file is the same as the original +# 11. Verify the checksum of the file is the same as the original # verify_runnable "both" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib index 5b157d11c15..f69ec300ca9 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib @@ -147,7 +147,7 @@ function setup_clone_env } # -# Clean up the test environmnet +# Clean up the test environment # # $1 number of snapshot Note: Currently only support three snapshots. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh index da14fa2fa62..4a9d29fce1c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh @@ -65,7 +65,7 @@ snap2=$fs@snap2 snap3=$fs@snap3 set -A badargs \ - "" "$TESTPOOL" "$TESTFS" "$fs" "$fs@nonexisten_snap" "?" \ + "" "$TESTPOOL" "$TESTFS" "$fs" "$fs@nonexistent_snap" "?" \ "$snap1/blah" "$snap1@blah" "-i" "-x" "-i $fs" \ "-x $snap1 $snap2" "-i $snap1" \ "-i $snap2 $snap1" "$snap1 $snap2" "-i $snap1 $snap2 $snap3" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh index 5fbc8bf7165..caad211bcf6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh @@ -64,4 +64,4 @@ do done done -log_pass "Setting invalid {primary|secondary}cache on fs or volume fail as expeced." +log_pass "Setting invalid {primary|secondary}cache on fs or volume fail as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh index 7cbcf7903e3..3b8b88e3631 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh @@ -40,7 +40,7 @@ # # STRATEGY: # 1. Setup a pool and create fs, volume, snapshot clone within it. -# 2. Set canmount=noauto for each dataset and check the retuen value +# 2. Set canmount=noauto for each dataset and check the return value # and check if it still can be mounted by mount -a. # 3. mount each dataset(except volume) to see if it can be mounted. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh index ad33e18fbb2..48580cafdb3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh @@ -34,7 +34,7 @@ # # DESCRIPTION: -# If ZFS is currently managing the file system but it is currently unmoutned, +# If ZFS is currently managing the file system but it is currently unmounted, # and the mountpoint property is changed, the file system remains unmounted. # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib index 084a4a0a82a..5e9f719dfcf 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib @@ -156,7 +156,7 @@ function random_string } # -# Get vaild user defined property name +# Get valid user defined property name # # $1 user defined property name length # @@ -189,7 +189,7 @@ function valid_user_property } # -# Get invaild user defined property name +# Get invalid user defined property name # # $1 user defined property name length # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh index 2efcf1cceb7..5d8b6e2750f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh @@ -82,7 +82,7 @@ while (( i < ${#args[*]} )); do ((i = i + 1)) done -# Testing the invalid senario: the child volume already has an +# Testing the invalid scenario: the child volume already has an # identical name snapshot, zfs snapshot -r should fail when # creating snapshot with -r for the parent log_must zfs destroy $TESTPOOL/$TESTCTR/$TESTFS1@$TESTSNAP diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh index 37791001327..627910abd6e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh @@ -34,7 +34,7 @@ # STRATEGY: # 1. Create 2 separate zpools, zpool name lengths must be the same. # 2. Attempt to simultaneously create a snapshot of each pool. -# 3. Veriy the snapshot creation failed. +# 3. Verify the snapshot creation failed. # verify_runnable "both" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh index 4cd98af0c69..f0682b816ae 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh @@ -22,7 +22,7 @@ # 1. Create multiple datasets # 2. Create multiple snapshots with a list of valid and invalid # snapshot names -# 3. Verify the valid snpashot creation +# 3. Verify the valid snapshot creation . $STF_SUITE/include/libtest.shlib @@ -86,7 +86,7 @@ for i in 1 2 3; do txg_tag=$(echo "$txg_group" | nawk -v j=$i 'FNR == j {print}') [[ $txg_tag != $(echo "$txg_group" | \ nawk -v j=$i 'FNR == j {print}') ]] \ - && log_fail "snapshots belong to differnt transaction groups" + && log_fail "snapshots belong to different transaction groups" done log_note "verify snapshot contents" for ds in $datasets; do diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh index 0ed14a99fc2..3575875c276 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh @@ -83,7 +83,7 @@ function restore_dataset } -log_assert "zfs fource unmount and destroy in snapshot directory will not cause error." +log_assert "zfs force unmount and destroy in snapshot directory will not cause error." log_onexit cleanup for fs in $TESTPOOL/$TESTFS $TESTPOOL ; do @@ -139,4 +139,4 @@ log_must eval zpool list > /dev/null 2>&1 log_must eval zpool status > /dev/null 2>&1 zpool iostat > /dev/null 2>&1 -log_pass "zfs fource unmount and destroy in snapshot directory will not cause error." +log_pass "zfs force unmount and destroy in snapshot directory will not cause error." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh index 7bb1cd4a37c..ca625bd2278 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh @@ -140,7 +140,7 @@ while (( i < ${#mntp_fs[*]} )); do ((i = i + 2)) done -log_note "Verify 'zfs unshare -a' succeds as root." +log_note "Verify 'zfs unshare -a' succeeds as root." i=0 typeset sharenfs_val diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh index e92581c7c9b..fd916040b1b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh @@ -46,7 +46,7 @@ verify_runnable "global" export NONEXISTFSNAME="nonexistfs50charslong_0123456789012345678901234567" export NONEXISTMOUNTPOINT="/nonexistmountpoint_0123456789" -set -A opts "" "$TESTPOOL/$NONEXISTFSNAME" "$NONEEXISTMOUNTPOINT" "-?" "-1" \ +set -A opts "" "$TESTPOOL/$NONEXISTFSNAME" "$NONEXISTMOUNTPOINT" "-?" "-1" \ "-a blah" "$TESTPOOL/$TESTFS $TESTPOOL/$TESTFS1" \ "-f $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS1" \ "$TESTPOOL/$TESTFS $TESTDIR" "-f $TESTPOOL/$TESTFS $TESTDIR" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh index e37b4f81abf..d3ed4a736cc 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh @@ -133,7 +133,7 @@ COUNT=$( wc -l $output | awk '{print $1}' ) if (( COUNT != OLDCOUNT )); then cat $output - log_fail "Unexpect old-version filesystems print out." + log_fail "Unexpected old-version filesystems print out." fi log_pass "Executing 'zfs upgrade' command succeeds." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh index a3158bd5781..25decd78863 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh @@ -37,7 +37,7 @@ # return an error. # # STRATEGY: -# 1. Create an array containg each zpool sub-command name. +# 1. Create an array containing each zpool sub-command name. # 2. For each element, execute the sub-command. # 3. Verify it returns an error. # From 6673ef3f6fa3cd5cdeae1411aa35d2c596b5d8c3 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 02:53:27 +0200 Subject: [PATCH 201/325] Fix typos in lib/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9237 --- lib/libefi/rdwr_efi.c | 6 +++--- lib/libshare/smb.c | 2 +- lib/libspl/asm-generic/atomic.c | 2 +- lib/libspl/include/atomic.h | 4 ++-- lib/libspl/include/sys/kstat.h | 2 +- lib/libspl/include/sys/param.h | 2 +- lib/libspl/include/sys/uio.h | 2 +- lib/libspl/include/sys/vtoc.h | 2 +- lib/libspl/mkdirp.c | 2 +- lib/libtpool/thread_pool.c | 2 +- lib/libzfs/THIRDPARTYLICENSE.openssl | 2 +- lib/libzfs/libzfs_crypto.c | 4 ++-- lib/libzfs/libzfs_dataset.c | 4 ++-- lib/libzfs/libzfs_pool.c | 6 +++--- lib/libzfs/libzfs_sendrecv.c | 2 +- lib/libzutil/zutil_import.c | 6 +++--- 16 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 93c79277dae..5311059ee81 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -224,7 +224,7 @@ efi_get_info(int fd, struct dk_cinfo *dki_info) /* * The simplest way to get the partition number under linux is - * to parse it out of the /dev/ block device name. + * to parse it out of the /dev/ block device name. * The kernel creates this using the partition number when it * populates /dev/ so it may be trusted. The tricky bit here is * that the naming convention is based on the block device type. @@ -1198,7 +1198,7 @@ efi_use_whole_disk(int fd) * Verify that we've found the reserved partition by checking * that it looks the way it did when we created it in zpool_label_disk. * If we've found the incorrect partition, then we know that this - * device was reformatted and no longer is soley used by ZFS. + * device was reformatted and no longer is solely used by ZFS. */ if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || @@ -1284,7 +1284,7 @@ efi_write(int fd, struct dk_gpt *vtoc) if ((rval = efi_get_info(fd, &dki_info)) != 0) return (rval); - /* check if we are dealing wih a metadevice */ + /* check if we are dealing with a metadevice */ if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && (strncmp(dki_info.dki_dname, "md", 3) == 0)) { md_flag = 1; diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c index 4c2045dfdb4..a95607ee032 100644 --- a/lib/libshare/smb.c +++ b/lib/libshare/smb.c @@ -29,7 +29,7 @@ * * TESTING * Make sure that samba listens to 'localhost' (127.0.0.1) and that the options - * 'usershare max shares' and 'usershare owner only' have been rewied/set + * 'usershare max shares' and 'usershare owner only' have been reviewed/set * accordingly (see zfs(8) for information). * * Once configuration in samba have been done, test that this diff --git a/lib/libspl/asm-generic/atomic.c b/lib/libspl/asm-generic/atomic.c index d0023b18281..03f8ddcfa8f 100644 --- a/lib/libspl/asm-generic/atomic.c +++ b/lib/libspl/asm-generic/atomic.c @@ -37,7 +37,7 @@ pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER; /* - * Theses are the void returning variants + * These are the void returning variants */ /* BEGIN CSTYLED */ #define ATOMIC_INC(name, type) \ diff --git a/lib/libspl/include/atomic.h b/lib/libspl/include/atomic.h index 7072a11bdb1..f8c257f9696 100644 --- a/lib/libspl/include/atomic.h +++ b/lib/libspl/include/atomic.h @@ -79,7 +79,7 @@ extern void atomic_add_64(volatile uint64_t *, int64_t); #endif /* - * Substract delta from target + * Subtract delta from target */ extern void atomic_sub_8(volatile uint8_t *, int8_t); extern void atomic_sub_char(volatile uchar_t *, signed char); @@ -173,7 +173,7 @@ extern uint64_t atomic_add_64_nv(volatile uint64_t *, int64_t); #endif /* - * Substract delta from target + * Subtract delta from target */ extern uint8_t atomic_sub_8_nv(volatile uint8_t *, int8_t); extern uchar_t atomic_sub_char_nv(volatile uchar_t *, signed char); diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index 9bd0d949d54..69fb6d401fc 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -82,7 +82,7 @@ typedef struct kstat { void *ks_data; /* kstat type-specific data */ uint_t ks_ndata; /* # of type-specific data records */ size_t ks_data_size; /* total size of kstat data section */ - hrtime_t ks_snaptime; /* time of last data shapshot */ + hrtime_t ks_snaptime; /* time of last data snapshot */ /* * Fields relevant to kernel only */ diff --git a/lib/libspl/include/sys/param.h b/lib/libspl/include/sys/param.h index c22d508f9b0..26335187fdc 100644 --- a/lib/libspl/include/sys/param.h +++ b/lib/libspl/include/sys/param.h @@ -37,7 +37,7 @@ * with smaller units (fragments) only in the last direct block. * MAXBSIZE primarily determines the size of buffers in the buffer * pool. It may be made larger without any effect on existing - * file systems; however making it smaller make make some file + * file systems; however making it smaller may make some file * systems unmountable. * * Note that the blocked devices are assumed to have DEV_BSIZE diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 97e8412ef70..91ee3b3fd00 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -75,7 +75,7 @@ typedef enum xuio_type { typedef struct uioa_page_s { /* locked uio_iov state */ int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */ - void **uioa_ppp; /* page_t or pfn_t arrary */ + void **uioa_ppp; /* page_t or pfn_t array */ caddr_t uioa_base; /* address base */ size_t uioa_len; /* span length */ } uioa_page_t; diff --git a/lib/libspl/include/sys/vtoc.h b/lib/libspl/include/sys/vtoc.h index 22a652b74bf..5d8448b628d 100644 --- a/lib/libspl/include/sys/vtoc.h +++ b/lib/libspl/include/sys/vtoc.h @@ -51,7 +51,7 @@ extern "C" { * v_sanity returned as VTOC_SANE * if Disk Label was sane * v_sectorsz returned as 512 - * v_reserved [all] retunred as zero + * v_reserved [all] returned as zero * timestamp [all] returned as zero * * See dklabel.h, read_vtoc(), and write_vtoc(). diff --git a/lib/libspl/mkdirp.c b/lib/libspl/mkdirp.c index 54174175200..fce2c1c82eb 100644 --- a/lib/libspl/mkdirp.c +++ b/lib/libspl/mkdirp.c @@ -128,7 +128,7 @@ mkdirp(const char *d, mode_t mode) * caller, or NULL is returned on error. * * The caller should handle error reporting based upon the - * returned vlaue, and should free the returned value, + * returned value, and should free the returned value, * when appropriate. */ diff --git a/lib/libtpool/thread_pool.c b/lib/libtpool/thread_pool.c index a43fdd9cd60..267fa834bd7 100644 --- a/lib/libtpool/thread_pool.c +++ b/lib/libtpool/thread_pool.c @@ -134,7 +134,7 @@ tpool_worker(void *arg) /* * This is the worker's main loop. - * It will only be left if a timeout or an error has occured. + * It will only be left if a timeout or an error has occurred. */ active.tpa_tid = pthread_self(); for (;;) { diff --git a/lib/libzfs/THIRDPARTYLICENSE.openssl b/lib/libzfs/THIRDPARTYLICENSE.openssl index a2c4adcbe6a..92c9e196a31 100644 --- a/lib/libzfs/THIRDPARTYLICENSE.openssl +++ b/lib/libzfs/THIRDPARTYLICENSE.openssl @@ -101,7 +101,7 @@ * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library + * The word 'cryptographic' can be left out if the routines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index d31f43b1fdf..b7b567ef53c 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -242,7 +242,7 @@ get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat, out: if (isatty(fileno(fd))) { - /* reset the teminal */ + /* reset the terminal */ (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term); (void) sigaction(SIGINT, &osigint, NULL); (void) sigaction(SIGTSTP, &osigtstp, NULL); @@ -1321,7 +1321,7 @@ zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey) if (is_encroot) { /* - * If this is already an ecryption root, just keep + * If this is already an encryption root, just keep * any properties not set by the user. */ if (keyformat == ZFS_KEYFORMAT_NONE) { diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index a35855d82fd..cc2f61a0d7d 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1452,7 +1452,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, * There was an error in parsing so * deal with it by issuing an error * message and leaving after - * uninitializing the the libshare + * uninitializing the libshare * interface. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -1657,7 +1657,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) /* * Helper for 'zfs {set|clone} refreservation=auto'. Must be called after - * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinal value. + * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinel value. * Return codes must match zfs_add_synthetic_resv(). */ static int diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index a6e26ebcd4d..29e6f0fd23d 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1531,7 +1531,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) case EOVERFLOW: /* - * This occurrs when one of the devices is below + * This occurs when one of the devices is below * SPA_MINDEVSIZE. Unfortunately, we can't detect which * device was the problem device since there's no * reliable way to determine device size from userland. @@ -4154,7 +4154,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) /* * Sort the resulting bookmarks. This is a little confusing due to the * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last - * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks + * to first, and 'zc_nvlist_dst_size' indicates the number of bookmarks * _not_ copied as part of the process. So we point the start of our * array appropriate and decrement the total number of elements. */ @@ -4782,7 +4782,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) if (rval) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " "EFI label on '%s' is damaged. Ensure\nthis device " - "is not in in use, and is functioning properly: %d"), + "is not in use, and is functioning properly: %d"), path, rval); return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index d967e043b4e..12a4b500ed1 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -4100,7 +4100,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, /* * Raw sends can not be performed as an incremental on top - * of existing unencryppted datasets. zfs recv -F cant be + * of existing unencrypted datasets. zfs recv -F can't be * used to blow away an existing encrypted filesystem. This * is because it would require the dsl dir to point to the * new key (or lack of a key) and the old key at the same diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index e82744383dc..28733cc747c 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1793,7 +1793,7 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, char *dpath, *name; /* - * Seperate the directory part and last part of the + * Separate the directory part and last part of the * path. We do this so that we can get the realpath of * the directory. We don't get the realpath on the * whole path because if it's a symlink, we want the @@ -2080,8 +2080,8 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg) tpool_destroy(t); /* - * Process the cache filtering out any entries which are not - * for the specificed pool then adding matching label configs. + * Process the cache, filtering out any entries which are not + * for the specified pool then adding matching label configs. */ cookie = NULL; while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) { From 5097eb6ac9f5e63d6307f0375df5042444789b13 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 02:56:41 +0200 Subject: [PATCH 202/325] Fix typos in module/zfs/ Reviewed-by: Matt Ahrens Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9240 --- module/zfs/arc.c | 18 +++++++++--------- module/zfs/dbuf.c | 8 ++++---- module/zfs/dmu.c | 6 +++--- module/zfs/dmu_objset.c | 4 ++-- module/zfs/dmu_zfetch.c | 2 +- module/zfs/dnode.c | 2 +- module/zfs/dsl_bookmark.c | 2 +- module/zfs/dsl_crypt.c | 6 +++--- module/zfs/dsl_dataset.c | 8 ++++---- module/zfs/dsl_destroy.c | 2 +- module/zfs/dsl_dir.c | 4 ++-- module/zfs/dsl_scan.c | 6 +++--- module/zfs/dsl_synctask.c | 2 +- module/zfs/dsl_userhold.c | 6 +++--- module/zfs/fm.c | 4 ++-- module/zfs/mmp.c | 4 ++-- module/zfs/policy.c | 2 +- module/zfs/qat.h | 6 +++--- module/zfs/sa.c | 4 ++-- module/zfs/spa.c | 12 ++++++------ module/zfs/spa_checkpoint.c | 2 +- module/zfs/spa_errlog.c | 2 +- module/zfs/spa_history.c | 2 +- module/zfs/txg.c | 6 +++--- module/zfs/vdev.c | 6 +++--- module/zfs/vdev_cache.c | 2 +- module/zfs/vdev_initialize.c | 2 +- module/zfs/vdev_mirror.c | 2 +- module/zfs/vdev_queue.c | 2 +- module/zfs/vdev_raidz.c | 6 +++--- .../zfs/vdev_raidz_math_aarch64_neon_common.h | 2 +- module/zfs/zcp.c | 6 +++--- module/zfs/zcp_get.c | 4 ++-- module/zfs/zcp_iter.c | 2 +- module/zfs/zfs_acl.c | 2 +- module/zfs/zfs_byteswap.c | 4 ++-- module/zfs/zfs_ctldir.c | 2 +- module/zfs/zfs_dir.c | 4 ++-- module/zfs/zfs_ioctl.c | 4 ++-- module/zfs/zfs_vfsops.c | 4 ++-- module/zfs/zfs_vnops.c | 6 +++--- module/zfs/zfs_znode.c | 4 ++-- module/zfs/zil.c | 4 ++-- module/zfs/zio_checksum.c | 2 +- module/zfs/zio_compress.c | 2 +- module/zfs/zio_crypt.c | 4 ++-- module/zfs/zio_inject.c | 2 +- module/zfs/zpl_super.c | 2 +- module/zfs/zvol.c | 2 +- 49 files changed, 101 insertions(+), 101 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index dd382b065a4..e16b44ca82e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -62,7 +62,7 @@ * elements of the cache are therefore exactly the same size. So * when adjusting the cache size following a cache miss, its simply * a matter of choosing a single page to evict. In our model, we - * have variable sized cache blocks (rangeing from 512 bytes to + * have variable sized cache blocks (ranging from 512 bytes to * 128K bytes). We therefore choose a set of blocks to evict to make * space for a cache miss that approximates as closely as possible * the space used by the new block. @@ -262,7 +262,7 @@ * The L1ARC has a slightly different system for storing encrypted data. * Raw (encrypted + possibly compressed) data has a few subtle differences from * data that is just compressed. The biggest difference is that it is not - * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded. + * possible to decrypt encrypted data (or vice-versa) if the keys aren't loaded. * The other difference is that encryption cannot be treated as a suggestion. * If a caller would prefer compressed data, but they actually wind up with * uncompressed data the worst thing that could happen is there might be a @@ -2152,7 +2152,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, } /* - * Adjust encrypted and authenticated headers to accomodate + * Adjust encrypted and authenticated headers to accommodate * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are * allowed to fail decryption due to keys not being loaded * without being marked as an IO error. @@ -2221,7 +2221,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, if (arc_buf_is_shared(buf)) { ASSERT(ARC_BUF_COMPRESSED(buf)); - /* We need to give the buf it's own b_data */ + /* We need to give the buf its own b_data */ buf->b_flags &= ~ARC_BUF_FLAG_SHARED; buf->b_data = arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); @@ -2837,7 +2837,7 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * sufficient to make this guarantee, however it's possible * (specifically in the rare L2ARC write race mentioned in * arc_buf_alloc_impl()) there will be an existing uncompressed buf that - * is sharable, but wasn't at the time of its allocation. Rather than + * is shareable, but wasn't at the time of its allocation. Rather than * allow a new shared uncompressed buf to be created and then shuffle * the list around to make it the last element, this simply disallows * sharing if the new buf isn't the first to be added. @@ -2896,7 +2896,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, /* * Only honor requests for compressed bufs if the hdr is actually - * compressed. This must be overriden if the buffer is encrypted since + * compressed. This must be overridden if the buffer is encrypted since * encrypted buffers cannot be decompressed. */ if (encrypted) { @@ -3200,7 +3200,7 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) } /* - * Free up buf->b_data and pull the arc_buf_t off of the the arc_buf_hdr_t's + * Free up buf->b_data and pull the arc_buf_t off of the arc_buf_hdr_t's * list and free it. */ static void @@ -3659,7 +3659,7 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) /* * This function is used by the send / receive code to convert a newly * allocated arc_buf_t to one that is suitable for a raw encrypted write. It - * is also used to allow the root objset block to be uupdated without altering + * is also used to allow the root objset block to be updated without altering * its embedded MACs. Both block types will always be uncompressed so we do not * have to worry about compression type or psize. */ @@ -6188,7 +6188,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, /* * Determine if we have an L1 cache hit or a cache miss. For simplicity - * we maintain encrypted data seperately from compressed / uncompressed + * we maintain encrypted data separately from compressed / uncompressed * data. If the user is requesting raw encrypted data and we don't have * that in the header we will read from disk to guarantee that we can * get it even if the encryption keys aren't loaded. diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 2bc995ac66c..0542ba7aeb2 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2275,7 +2275,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) ASSERT(!zfs_refcount_is_zero(&db->db_holds)); /* - * Quick check for dirtyness. For already dirty blocks, this + * Quick check for dirtiness. For already dirty blocks, this * reduces runtime of this function by >90%, and overall performance * by 50% for some workloads (e.g. file deletion with indirect blocks * cached). @@ -2791,7 +2791,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, * Hold the dn_dbufs_mtx while we get the new dbuf * in the hash table *and* added to the dbufs list. * This prevents a possible deadlock with someone - * trying to look up this dbuf before its added to the + * trying to look up this dbuf before it's added to the * dn_dbufs list. */ mutex_enter(&dn->dn_dbufs_mtx); @@ -3185,7 +3185,7 @@ dbuf_hold_impl_arg(struct dbuf_hold_arg *dh) ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf)); /* - * If this buffer is currently syncing out, and we are are + * If this buffer is currently syncing out, and we are * still referencing it from db_data, we need to make a copy * of it in case we decide we want to dirty it again in this txg. */ @@ -3666,7 +3666,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) /* * This buffer was allocated at a time when there was * no available blkptrs from the dnode, or it was - * inappropriate to hook it in (i.e., nlevels mis-match). + * inappropriate to hook it in (i.e., nlevels mismatch). */ ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); ASSERT(db->db_parent == NULL); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index a086f5ca6d9..f972545d30b 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -646,11 +646,11 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) /* * Issue prefetch i/os for the given blocks. If level is greater than 0, the - * indirect blocks prefeteched will be those that point to the blocks containing + * indirect blocks prefetched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. * * Note that if the indirect blocks above the blocks being prefetched are not - * in cache, they will be asychronously read in. + * in cache, they will be asynchronously read in. */ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, @@ -2301,7 +2301,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) * Determine dedup setting. If we are in dmu_sync(), * we won't actually dedup now because that's all * done in syncing context; but we do want to use the - * dedup checkum. If the checksum is not strong + * dedup checksum. If the checksum is not strong * enough to ensure unique signatures, force * dedup_verify. */ diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 88e97e1a310..c78019d05ac 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1027,7 +1027,7 @@ dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* * We don't want to have to increase the meta-dnode's nlevels - * later, because then we could do it in quescing context while + * later, because then we could do it in quiescing context while * we are also accessing it in open context. * * This precaution is not necessary for the MOS (ds == NULL), @@ -2742,7 +2742,7 @@ dmu_objset_find_dp_cb(void *arg) /* * We need to get a pool_config_lock here, as there are several - * asssert(pool_config_held) down the stack. Getting a lock via + * assert(pool_config_held) down the stack. Getting a lock via * dsl_pool_config_enter is risky, as it might be stalled by a * pending writer. This would deadlock, as the write lock can * only be granted when our parent thread gives up the lock. diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index 364e4d7aa86..46dc4627cf3 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -223,7 +223,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) * can only read from blocks that we carefully ensure are on * concrete vdevs (or previously-loaded indirect vdevs). So we * can't allow the predictive prefetcher to attempt reads of other - * blocks (e.g. of the MOS's dnode obejct). + * blocks (e.g. of the MOS's dnode object). */ if (!spa_indirect_vdevs_loaded(spa)) return; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 097eaf3ee6f..ec297a242b2 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1787,7 +1787,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) dn->dn_indblkshift = ibs; dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; } - /* rele after we have fixed the blocksize in the dnode */ + /* release after we have fixed the blocksize in the dnode */ if (db) dbuf_rele(db, FTAG); diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index a32198402f4..01362e0ad28 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -84,7 +84,7 @@ dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname, } /* - * If later_ds is non-NULL, this will return EXDEV if the the specified bookmark + * If later_ds is non-NULL, this will return EXDEV if the specified bookmark * does not represents an earlier point in later_ds's timeline. * * Returns ENOENT if the dataset containing the bookmark does not exist. diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 24711227ba5..271019e7902 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -227,7 +227,7 @@ dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, goto error; } - /* if the user asked for the deault crypt, determine that now */ + /* if the user asked for the default crypt, determine that now */ if (dcp->cp_crypt == ZIO_CRYPT_ON) dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; @@ -1596,7 +1596,7 @@ spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) /* * Perform the actual work in syncing context. The blocks modified * here could be calculated but it would require holding the pool - * lock and tarversing all of the datasets that will have their keys + * lock and traversing all of the datasets that will have their keys * changed. */ return (dsl_sync_task(dsname, spa_keystore_change_key_check, @@ -1714,7 +1714,7 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, return; /* - * If the target is being promoted to the encyrption root update the + * If the target is being promoted to the encryption root update the * DSL Crypto Key and keylocation to reflect that. We also need to * update the DSL Crypto Keys of all children inheritting their * encryption root to point to the new target. Otherwise, the check diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 966c2cc93d1..33b8cafbfb6 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -363,7 +363,7 @@ load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f) } /* - * We have to release the fsid syncronously or we risk that a subsequent + * We have to release the fsid synchronously or we risk that a subsequent * mount of the same dataset will fail to unique_insert the fsid. This * failure would manifest itself as the fsid of this dataset changing * between mounts which makes NFS clients quite unhappy. @@ -2076,7 +2076,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) * We use nvlist_alloc() instead of fnvlist_alloc() because the * latter would allocate the list with NV_UNIQUE_NAME flag. * As a result, every time a clone name is appended to the list - * it would be (linearly) searched for for a duplicate name. + * it would be (linearly) searched for a duplicate name. * We already know that all clone names must be unique and we * want avoid the quadratic complexity of double-checking that * because we can have a large number of clones. @@ -2404,7 +2404,7 @@ dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, int error; dsl_pool_t *dp = ds->ds_dir->dd_pool; - /* Retrieve the mountpoint value stored in the zap opbject */ + /* Retrieve the mountpoint value stored in the zap object */ error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1, ZAP_MAXVALUELEN, value, source); if (error != 0) { @@ -3635,7 +3635,7 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, * The clone can't be too much over the head's refquota. * * To ensure that the entire refquota can be used, we allow one - * transaction to exceed the the refquota. Therefore, this check + * transaction to exceed the refquota. Therefore, this check * needs to also allow for the space referenced to be more than the * refquota. The maximum amount of space that one transaction can use * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index a01abfa0038..ede54d9092d 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -632,7 +632,7 @@ dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, /* * lzc_destroy_snaps() is documented to fill the errlist with - * int32 values, so we need to covert the int64 values that are + * int32 values, so we need to convert the int64 values that are * returned from LUA. */ int rv = 0; diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 6fb711f592c..724f80ff3f5 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -96,7 +96,7 @@ * limit set. If there is a limit at any initialized level up the tree, the * check must pass or the creation will fail. Likewise, when a filesystem or * snapshot is destroyed, the counts are recursively adjusted all the way up - * the initizized nodes in the tree. Renaming a filesystem into different point + * the initialized nodes in the tree. Renaming a filesystem into different point * in the tree will first validate, then update the counts on each branch up to * the common ancestor. A receive will also validate the counts and then update * them. @@ -1495,7 +1495,7 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) * less than the amount specified. * * NOTE: The behavior of this function is identical to the Illumos / FreeBSD - * version however it has been adjusted to use an iterative rather then + * version however it has been adjusted to use an iterative rather than * recursive algorithm to minimize stack usage. */ void diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 202c6e8d8f3..c37e77be44a 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1911,7 +1911,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, /* * This debugging is commented out to conserve stack space. This - * function is called recursively and the debugging addes several + * function is called recursively and the debugging adds several * bytes to the stack for each call. It can be commented back in * if required to debug an issue in dsl_scan_visitbp(). * @@ -3391,7 +3391,7 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx) /* * This is the primary entry point for scans that is called from syncing * context. Scans must happen entirely during syncing context so that we - * cna guarantee that blocks we are currently scanning will not change out + * can guarantee that blocks we are currently scanning will not change out * from under us. While a scan is active, this function controls how quickly * transaction groups proceed, instead of the normal handling provided by * txg_sync_thread(). @@ -3995,7 +3995,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, * As can be seen, at fill_ratio=3, the algorithm is slightly biased towards * extents that are more completely filled (in a 3:2 ratio) vs just larger. * Note that as an optimization, we replace multiplication and division by - * 100 with bitshifting by 7 (which effecitvely multiplies and divides by 128). + * 100 with bitshifting by 7 (which effectively multiplies and divides by 128). */ static int ext_size_compare(const void *x, const void *y) diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index b225eed37d4..2d6ca8549eb 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -143,7 +143,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, * For that reason, early synctasks can affect the process of writing dirty * changes to disk for the txg that they run and should be used with caution. * In addition, early synctasks should not dirty any metaslabs as this would - * invalidate the precodition/invariant for subsequent early synctasks. + * invalidate the precondition/invariant for subsequent early synctasks. * [see dsl_pool_sync() and dsl_early_sync_task_verify()] */ int diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index 638805d0b92..2b2182fadec 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -302,7 +302,7 @@ dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) * holds is nvl of snapname -> holdname * errlist will be filled in with snapname -> error * - * The snaphosts must all be in the same pool. + * The snapshots must all be in the same pool. * * Holds for snapshots that don't exist will be skipped. * @@ -556,9 +556,9 @@ dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) * errlist will be filled in with snapname -> error * * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots, - * otherwise they should be the names of shapshots. + * otherwise they should be the names of snapshots. * - * As a release may cause snapshots to be destroyed this trys to ensure they + * As a release may cause snapshots to be destroyed this tries to ensure they * aren't mounted. * * The release of non-existent holds are skipped. diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 0a0fc79bd37..98a844820b3 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -31,7 +31,7 @@ * Name-Value Pair Lists * * The embodiment of an FMA protocol element (event, fmri or authority) is a - * name-value pair list (nvlist_t). FMA-specific nvlist construtor and + * name-value pair list (nvlist_t). FMA-specific nvlist constructor and * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used * to create an nvpair list using custom allocators. Callers may choose to * allocate either from the kernel memory allocator, or from a preallocated @@ -784,7 +784,7 @@ zfs_zevent_destroy(zfs_zevent_t *ze) #endif /* _KERNEL */ /* - * Wrapppers for FM nvlist allocators + * Wrappers for FM nvlist allocators */ /* ARGSUSED */ static void * diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index 1ffd862da12..810d20fdd95 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -87,12 +87,12 @@ * * In this case, a weak guarantee is provided. Since the host which last had * the pool imported will suspend the pool if no mmp writes land within - * fail_intervals * multihost_interval ms, the absense of writes during that + * fail_intervals * multihost_interval ms, the absence of writes during that * time means either the pool is not imported, or it is imported but the pool * is suspended and no further writes will occur. * * Note that resuming the suspended pool on the remote host would invalidate - * this gurantee, and so it is not allowed. + * this guarantee, and so it is not allowed. * * The factor of 2 provides a conservative safety factor and derives from * MMP_IMPORT_SAFETY_FACTOR; diff --git a/module/zfs/policy.c b/module/zfs/policy.c index a723235d301..7f9456a670e 100644 --- a/module/zfs/policy.c +++ b/module/zfs/policy.c @@ -70,7 +70,7 @@ static int priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err) { /* - * All priv_policy_user checks are preceeded by kuid/kgid_has_mapping() + * All priv_policy_user checks are preceded by kuid/kgid_has_mapping() * checks. If we cannot do them, we shouldn't be using ns_capable() * since we don't know whether the affected files are valid in our * namespace. Note that kuid_has_mapping() came after cred->user_ns, so diff --git a/module/zfs/qat.h b/module/zfs/qat.h index 5c1cd15d09d..9ae8eb17357 100644 --- a/module/zfs/qat.h +++ b/module/zfs/qat.h @@ -80,7 +80,7 @@ typedef struct qat_stats { * Number of fails in the QAT compression / decompression engine. * Note: when a QAT error happens, it doesn't necessarily indicate a * critical hardware issue. Sometimes it is because the output buffer - * is not big enough. The compression job will be transfered to the + * is not big enough. The compression job will be transferred to the * gzip software implementation so the functionality of ZFS is not * impacted. */ @@ -113,7 +113,7 @@ typedef struct qat_stats { /* * Number of fails in the QAT encryption / decryption engine. * Note: when a QAT error happens, it doesn't necessarily indicate a - * critical hardware issue. The encryption job will be transfered + * critical hardware issue. The encryption job will be transferred * to the software implementation so the functionality of ZFS is * not impacted. */ @@ -130,7 +130,7 @@ typedef struct qat_stats { /* * Number of fails in the QAT checksum engine. * Note: when a QAT error happens, it doesn't necessarily indicate a - * critical hardware issue. The checksum job will be transfered to the + * critical hardware issue. The checksum job will be transferred to the * software implementation so the functionality of ZFS is not impacted. */ kstat_named_t cksum_fails; diff --git a/module/zfs/sa.c b/module/zfs/sa.c index f718e7662e6..621838396a4 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -83,7 +83,7 @@ * Layouts are simply an array of the attributes and their * ordering i.e. [0, 1, 4, 5, 2] * - * Each distinct layout is given a unique layout number and that is whats + * Each distinct layout is given a unique layout number and that is what's * stored in the header at the beginning of the SA data buffer. * * A layout only covers a single dbuf (bonus or spill). If a set of @@ -95,7 +95,7 @@ * Adding a single attribute will cause the entire set of attributes to * be rewritten and could result in a new layout number being constructed * as part of the rewrite if no such layout exists for the new set of - * attribues. The new attribute will be appended to the end of the already + * attributes. The new attribute will be appended to the end of the already * existing attributes. * * Both the attribute registration and attribute layout information are diff --git a/module/zfs/spa.c b/module/zfs/spa.c index a9efe254b6b..7b2a2081fd0 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -2113,7 +2113,7 @@ spa_load_verify_done(zio_t *zio) } /* - * Maximum number of inflight bytes is the log2 faction of the arc size. + * Maximum number of inflight bytes is the log2 fraction of the arc size. * By default, we set it to 1/16th of the arc. */ int spa_load_verify_shift = 4; @@ -2566,7 +2566,7 @@ spa_activity_check_duration(spa_t *spa, uberblock_t *ub) } else if (MMP_VALID(ub)) { /* - * zfs-0.7 compatability case + * zfs-0.7 compatibility case */ import_delay = MAX(import_delay, (multihost_interval + @@ -3861,7 +3861,7 @@ spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, need_update = B_TRUE; /* - * Update the config cache asychronously in case we're the + * Update the config cache asynchronously in case we're the * root pool, in which case the config cache isn't writable yet. */ if (need_update) @@ -4174,7 +4174,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) return (error); /* - * Redo the loading process process again with the + * Redo the loading process again with the * checkpointed uberblock. */ spa_ld_prepare_for_reload(spa); @@ -7923,7 +7923,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) case ZPOOL_PROP_READONLY: case ZPOOL_PROP_CACHEFILE: /* - * 'readonly' and 'cachefile' are also non-persisitent + * 'readonly' and 'cachefile' are also non-persistent * properties. */ break; @@ -8734,7 +8734,7 @@ EXPORT_SYMBOL(spa_inject_delref); EXPORT_SYMBOL(spa_scan_stat_init); EXPORT_SYMBOL(spa_scan_get_stats); -/* device maniion */ +/* device manipulation */ EXPORT_SYMBOL(spa_vdev_add); EXPORT_SYMBOL(spa_vdev_attach); EXPORT_SYMBOL(spa_vdev_detach); diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index d6f68ceda58..44711acef5a 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -102,7 +102,7 @@ * Once the synctask is done and the discarding zthr is awake, we discard * the checkpointed data over multiple TXGs by having the zthr prefetching * entries from vdev_checkpoint_sm and then starting a synctask that places - * them as free blocks in to their respective ms_allocatable and ms_sm + * them as free blocks into their respective ms_allocatable and ms_sm * structures. * [see spa_checkpoint_discard_thread()] * diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index e42f8a0212f..fa5120eb61b 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -31,7 +31,7 @@ * and the current log. All errors seen are logged to the current log. When a * scrub completes, the current log becomes the last log, the last log is thrown * out, and the current log is reinitialized. This way, if an error is somehow - * corrected, a new scrub will show that that it no longer exists, and will be + * corrected, a new scrub will show that it no longer exists, and will be * deleted from the log when the scrub completes. * * The log is stored using a ZAP object whose key is a string form of the diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index b590a1d57bd..fa95d316073 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -63,7 +63,7 @@ * overwrite the original creation of the pool. 'sh_phys_max_off' is the * physical ending offset in bytes of the log. This tells you the length of * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record - * is added, 'sh_eof' is incremented by the the size of the record. + * is added, 'sh_eof' is incremented by the size of the record. * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). * This is where the consumer should start reading from after reading in * the 'zpool create' portion of the log. diff --git a/module/zfs/txg.c b/module/zfs/txg.c index d1fb50188e4..418315be86d 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -644,8 +644,8 @@ txg_quiesce_thread(void *arg) /* * Delay this thread by delay nanoseconds if we are still in the open - * transaction group and there is already a waiting txg quiesing or quiesced. - * Abort the delay if this txg stalls or enters the quiesing state. + * transaction group and there is already a waiting txg quiescing or quiesced. + * Abort the delay if this txg stalls or enters the quiescing state. */ void txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) @@ -768,7 +768,7 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce) /* * If there isn't a txg syncing or in the pipeline, push another txg through - * the pipeline by queiscing the open txg. + * the pipeline by quiescing the open txg. */ void txg_kick(dsl_pool_t *dp) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 81ef87e254a..952b565819e 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -223,7 +223,7 @@ vdev_default_xlate(vdev_t *vd, const range_seg_t *in, range_seg_t *res) } /* - * Derive the enumerated alloction bias from string input. + * Derive the enumerated allocation bias from string input. * String origin is either the per-vdev zap or zpool(1M). */ static vdev_alloc_bias_t @@ -1320,7 +1320,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) #ifndef _KERNEL /* - * To accomodate zdb_leak_init() fake indirect + * To accommodate zdb_leak_init() fake indirect * metaslabs, we allocate a metaslab group for * indirect vdevs which normally don't have one. */ @@ -4177,7 +4177,7 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion * factor. We must calculate this here and not at the root vdev * because the root vdev's psize-to-asize is simply the max of its - * childrens', thus not accurate enough for us. + * children's, thus not accurate enough for us. */ dspace_delta = vdev_deflated_space(vd, space_delta); diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index 0f1d9448b59..b63b9f9795f 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -46,7 +46,7 @@ * terribly wasteful of bandwidth. A more intelligent version of the cache * could keep track of access patterns and not do read-ahead unless it sees * at least two temporally close I/Os to the same region. Currently, only - * metadata I/O is inflated. A futher enhancement could take advantage of + * metadata I/O is inflated. A further enhancement could take advantage of * more semantic information about the I/O. And it could use something * faster than an AVL tree; that was chosen solely for convenience. * diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c index b1590132636..803d97c297c 100644 --- a/module/zfs/vdev_initialize.c +++ b/module/zfs/vdev_initialize.c @@ -599,7 +599,7 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list) } /* - * Stop initializing a device, with the resultant initialing state being + * Stop initializing a device, with the resultant initializing state being * tgt_state. For blocking behavior pass NULL for vd_list. Otherwise, when * a list_t is provided the stopping vdev is inserted in to the list. Callers * are then required to call vdev_initialize_stop_wait() to block for all the diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 23ff75bfc96..2f75fca827f 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -485,7 +485,7 @@ vdev_mirror_preferred_child_randomize(zio_t *zio) /* * Try to find a vdev whose DTL doesn't contain the block we want to read - * prefering vdevs based on determined load. + * preferring vdevs based on determined load. * * Try to find a child whose DTL doesn't contain the block we want to read. * If we can't, try the read on any vdev we haven't already tried. diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 86b20f13483..d3d9a6baa4a 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -893,7 +893,7 @@ vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority) * ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio * code to issue IOs without adding them to the vdev queue. In this * case, the zio is already going to be issued as quickly as possible - * and so it doesn't need any reprioitization to help. + * and so it doesn't need any reprioritization to help. */ if (zio->io_priority == ZIO_PRIORITY_NOW) return; diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 327b186713f..f63ccaa94cb 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -98,7 +98,7 @@ * R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1 * = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1 * - * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival + * We chose 1, 2, and 4 as our generators because 1 corresponds to the trivial * XOR operation, and 2 and 4 can be computed quickly and generate linearly- * independent coefficients. (There are no additional coefficients that have * this property which is why the uncorrected Plank method breaks down.) @@ -447,7 +447,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, /* * If all data stored spans all columns, there's a danger that parity * will always be on the same device and, since parity isn't read - * during normal operation, that that device's I/O bandwidth won't be + * during normal operation, that device's I/O bandwidth won't be * used effectively. We therefore switch the parity every 1MB. * * ... at least that was, ostensibly, the theory. As a practical @@ -2336,7 +2336,7 @@ vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded) /* * Determine if any portion of the provided block resides on a child vdev * with a dirty DTL and therefore needs to be resilvered. The function - * assumes that at least one DTL is dirty which imples that full stripe + * assumes that at least one DTL is dirty which implies that full stripe * width blocks must be resilvered. */ static boolean_t diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h index 024917417a5..0ea2ad611c7 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h +++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h @@ -42,7 +42,7 @@ /* * Here we need registers not used otherwise. * They will be used in unused ASM for the case - * with more registers than required... but GGC + * with more registers than required... but GCC * will still need to make sure the constraints * are correct, and duplicate constraints are illegal * ... and we use the "register" number as a name diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 1aeea131449..44e4d230a30 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -66,7 +66,7 @@ * consuming excessive system or running forever. If one of these limits is * hit, the channel program will be stopped immediately and return from * zcp_eval() with an error code. No attempt will be made to roll back or undo - * any changes made by the channel program before the error occured. + * any changes made by the channel program before the error occurred. * Consumers invoking zcp_eval() from elsewhere in the kernel may pass a time * limit of 0, disabling the time limit. * @@ -77,7 +77,7 @@ * In place of a return value, an error message will also be returned in the * 'result' nvlist containing information about the error. No attempt will be * made to roll back or undo any changes made by the channel program before the - * error occured. + * error occurred. * * 3. If an error occurs inside a ZFS library call which returns an error code, * the error is returned to the Lua script to be handled as desired. @@ -160,7 +160,7 @@ zcp_argerror(lua_State *state, int narg, const char *msg, ...) * of a function call. * * If an error occurs, the cleanup function will be invoked exactly once and - * then unreigstered. + * then unregistered. * * Returns the registered cleanup handler so the caller can deregister it * if no error occurs. diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c index 0a5f0b8242a..42c125d48cd 100644 --- a/module/zfs/zcp_get.c +++ b/module/zfs/zcp_get.c @@ -547,7 +547,7 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop) error = dsl_prop_get_ds(ds, prop_name, sizeof (numval), 1, &numval, setpoint); - /* Fill in temorary value for prop, if applicable */ + /* Fill in temporary value for prop, if applicable */ (void) get_temporary_prop(ds, zfs_prop, &numval, setpoint); /* Push value to lua stack */ @@ -678,7 +678,7 @@ parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type, if (strncmp(cp, "S-1-", 4) == 0) { /* * It's a numeric SID (eg "S-1-234-567-89") and we want to - * seperate the domain id and the rid + * separate the domain id and the rid */ int domain_len = strrchr(cp, '-') - cp; domain_val = kmem_alloc(domain_len + 1, KM_SLEEP); diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c index f2644552071..d6e0b542175 100644 --- a/module/zfs/zcp_iter.c +++ b/module/zfs/zcp_iter.c @@ -435,7 +435,7 @@ static zcp_list_info_t zcp_system_props_list_info = { }; /* - * Get a list of all visble properties and their values for a given dataset. + * Get a list of all visible properties and their values for a given dataset. * Returned on the stack as a Lua table. */ static int diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index b1af4da2f4a..26af91e27d4 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -810,7 +810,7 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) * for zfs_copy_ace_2_fuid(). * * We only convert an ACL once, so this won't happen - * everytime. + * every time. */ oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count, KM_SLEEP); diff --git a/module/zfs/zfs_byteswap.c b/module/zfs/zfs_byteswap.c index 7893bde4e2d..1b8bb82c3fb 100644 --- a/module/zfs/zfs_byteswap.c +++ b/module/zfs/zfs_byteswap.c @@ -44,7 +44,7 @@ zfs_oldace_byteswap(ace_t *ace, int ace_cnt) } /* - * swap ace_t and ace_oject_t + * swap ace_t and ace_object_t */ void zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) @@ -70,7 +70,7 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) * larger than needed to hold the aces * present. As long as we do not do any * swapping beyond the end of our block we are - * okay. It it safe to swap any non-ace data + * okay. It is safe to swap any non-ace data * within the block since it is just zeros. */ if (ptr + sizeof (zfs_ace_hdr_t) > end) { diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index b3cbc7d7e5f..1e61ef06d00 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -596,7 +596,7 @@ zfsctl_root(znode_t *zp) /* * Generate a long fid to indicate a snapdir. We encode whether snapdir is - * already monunted in gen field. We do this because nfsd lookup will not + * already mounted in gen field. We do this because nfsd lookup will not * trigger automount. Next time the nfsd does fh_to_dentry, we will notice * this and do automount and return ESTALE to force nfsd revalidate and follow * mount. diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 63ac97754d3..6bdad737cd8 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -55,7 +55,7 @@ #include /* - * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups + * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups * of names after deciding which is the appropriate lookup interface. */ static int @@ -232,7 +232,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, /* * Wait until there are no locks on this name. * - * Don't grab the the lock if it is already held. However, cannot + * Don't grab the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index ac573ccbf17..f2e808d6fb2 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2111,7 +2111,7 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) * which we aren't supposed to do with a * DS_MODE_USER hold, because it could be * inconsistent. So this is a bit of a workaround... - * XXX reading with out owning + * XXX reading without owning */ if (!zc->zc_objset_stats.dds_inconsistent && dmu_objset_type(os) == DMU_OST_ZVOL) { @@ -6877,7 +6877,7 @@ zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec) continue; if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) { - /* at least one non-optionial key is expected here */ + /* at least one non-optional key is expected here */ if (!required_keys_found) return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED)); continue; diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 6348cac7dcc..0e14cadac5e 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1476,7 +1476,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) * "preferred" size. */ - /* Round up so we never have a filesytem using 0 blocks. */ + /* Round up so we never have a filesystem using 0 blocks. */ refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize); statp->f_blocks = (refdbytes + availbytes) >> bshift; statp->f_bfree = availbytes >> bshift; @@ -2396,7 +2396,7 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) } /* - * Return true if the coresponding vfs's unmounted flag is set. + * Return true if the corresponding vfs's unmounted flag is set. * Otherwise return false. * If this function returns true we know VFS unmount has been initiated. */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 1ad6f1588cc..de7b59935e8 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -889,7 +889,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Clear Set-UID/Set-GID bits on successful write if not * privileged and at least one of the execute bits is set. * - * It would be nice to to this after all writes have + * It would be nice to do this after all writes have * been done, but that would still expose the ISUID/ISGID * to another app after the partial write is committed. * @@ -4378,7 +4378,7 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, uint64_t txtype = TX_LINK; /* * tmpfile is created to be in z_unlinkedobj, so remove it. - * Also, we don't log in ZIL, be cause all previous file + * Also, we don't log in ZIL, because all previous file * operation on the tmpfile are ignored by ZIL. Instead we * always wait for txg to sync to make sure all previous * operation are sync safe. @@ -4638,7 +4638,7 @@ zfs_dirty_inode(struct inode *ip, int flags) #ifdef I_DIRTY_TIME /* - * This is the lazytime semantic indroduced in Linux 4.0 + * This is the lazytime semantic introduced in Linux 4.0 * This flag will only be called from update_time when lazytime is set. * (Note, I_DIRTY_SYNC will also set if not lazytime) * Fortunately mtime and ctime are managed within ZFS itself, so we diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 498547758b1..234e134904a 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -787,7 +787,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } /* - * No execs denied will be deterimed when zfs_mode_compute() is called. + * No execs denied will be determined when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| @@ -1270,7 +1270,7 @@ zfs_rezget(znode_t *zp) * If the file has zero links, then it has been unlinked on the send * side and it must be in the received unlinked set. * We call zfs_znode_dmu_fini() now to prevent any accesses to the - * stale data and to prevent automatical removal of the file in + * stale data and to prevent automatic removal of the file in * zfs_zinactive(). The file will be removed either when it is removed * on the send side and the next incremental stream is received or * when the unlinked set gets processed. diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 98678aa4465..8411e333b18 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -58,7 +58,7 @@ * * In the event of a crash or power loss, the itxs contained by each * dataset's on-disk ZIL will be replayed when that dataset is first - * instantiated (e.g. if the dataset is a normal fileystem, when it is + * instantiated (e.g. if the dataset is a normal filesystem, when it is * first mounted). * * As hinted at above, there is one ZIL per dataset (both the in-memory @@ -2002,7 +2002,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) /* * If there are any in-memory intent log transactions which have now been * synced then start up a taskq to free them. We should only do this after we - * have written out the uberblocks (i.e. txg has been comitted) so that + * have written out the uberblocks (i.e. txg has been committed) so that * don't inadvertently clean out in-memory log records that would be required * by zil_commit(). */ diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index 7b148375d0c..179fab5de36 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -308,7 +308,7 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) mutex_exit(&spa->spa_cksum_tmpls_lock); } -/* convenience function to update a checksum to accomodate an encryption MAC */ +/* convenience function to update a checksum to accommodate an encryption MAC */ static void zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor) { diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index f5cbc3e8218..cdaade27c67 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -155,7 +155,7 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, abd_return_buf(src, tmp, s_len); /* - * Decompression shouldn't fail, because we've already verifyied + * Decompression shouldn't fail, because we've already verified * the checksum. However, for extra protection (e.g. against bitflips * in non-ECC RAM), we handle this error (and test it). */ diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index eb781b64fa1..7cf20f4136b 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -369,7 +369,7 @@ zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) /* * This function handles all encryption and decryption in zfs. When * encrypting it expects puio to reference the plaintext and cuio to - * reference the cphertext. cuio must have enough space for the + * reference the ciphertext. cuio must have enough space for the * ciphertext + room for a MAC. datalen should be the length of the * plaintext / ciphertext alone. */ @@ -934,7 +934,7 @@ zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) /* * At L0 we want to verify these fields to ensure that data blocks - * can not be reinterpretted. For instance, we do not want an attacker + * can not be reinterpreted. For instance, we do not want an attacker * to trick us into returning raw lz4 compressed data to the user * by modifying the compression bits. At higher levels, we cannot * enforce this policy since raw sends do not convey any information diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 78896d3dc38..d8af503bdfc 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -113,7 +113,7 @@ freq_triggered(uint32_t frequency) return (B_TRUE); /* - * Note: we still handle legacy (unscaled) frequecy values + * Note: we still handle legacy (unscaled) frequency values */ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index 216c7940152..810ab28988a 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -297,7 +297,7 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) * The dsl pool lock must be released prior to calling sget(). * It is possible sget() may block on the lock in grab_super() * while deactivate_super() holds that same lock and waits for - * a txg sync. If the dsl_pool lock is held over over sget() + * a txg sync. If the dsl_pool lock is held over sget() * this can prevent the pool sync and cause a deadlock. */ dsl_pool_rele(dmu_objset_pool(os), FTAG); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index f74eb28aec8..840b8d008ec 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1997,7 +1997,7 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg) /* at this point, the dsname should name a snapshot */ if (strchr(dsname, '@') == 0) { dprintf("zvol_create_snap_minor_cb(): " - "%s is not a shapshot name\n", dsname); + "%s is not a snapshot name\n", dsname); } else { minors_job_t *job; char *n = strdup(dsname); From 500977eed2be539c7f0d43d157526102e2809f16 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 02:58:26 +0200 Subject: [PATCH 203/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9242 --- tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh | 2 +- .../zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh | 2 +- .../zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh | 2 +- tests/zfs-tests/tests/functional/trim/trim.kshlib | 2 +- .../tests/functional/userquota/userquota_005_neg.ksh | 2 +- .../tests/functional/userquota/userquota_010_pos.ksh | 2 +- .../tests/functional/userquota/userquota_012_neg.ksh | 2 +- tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh | 4 ++-- tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh | 2 +- .../tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh index fa610511657..a53aeabffcd 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh @@ -26,7 +26,7 @@ # 3. Concurrently do the following: # 3.1. Perform 8K sync writes # 3.2. Perform log offline/online commands -# 4. Loop to test with growing "zfs_commit_timout_pct" values. +# 4. Loop to test with growing "zfs_commit_timeout_pct" values. # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh index b404ffbd50e..124a7db9c6e 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh @@ -36,7 +36,7 @@ # DESCRIPTION: # An archive of a zfs file system and an archive of its snapshot # is identical even though the original file system has -# changed sinced the snapshot was taken. +# changed since the snapshot was taken. # # STRATEGY: # 1) Create files in all of the zfs file systems diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh index dc50e46933a..68a616c02a6 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh @@ -35,7 +35,7 @@ # # DESCRIPTION: # An archive of a zfs dataset and an archive of its snapshot -# changed sinced the snapshot was taken. +# changed since the snapshot was taken. # # STRATEGY: # 1) Create some files in a ZFS dataset diff --git a/tests/zfs-tests/tests/functional/trim/trim.kshlib b/tests/zfs-tests/tests/functional/trim/trim.kshlib index 02802d8c91b..ed6a8f91b97 100644 --- a/tests/zfs-tests/tests/functional/trim/trim.kshlib +++ b/tests/zfs-tests/tests/functional/trim/trim.kshlib @@ -18,7 +18,7 @@ . $STF_SUITE/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib # -# Get the actual on disk disk for the provided file. +# Get the actual size on disk for the provided file. # function get_size_mb { diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh index 825ebe09b28..5684b05b7e4 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh @@ -64,7 +64,7 @@ for user in "${no_users[@]}"; do log_mustnot zfs set userquota@$user=100m $QFS done -log_note "can set all numberic id even that id is not existed" +log_note "can set all numeric id even if that id does not exist" log_must zfs set userquota@12345678=100m $QFS log_mustnot zfs set userquota@12345678=100m $snap_fs diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh index 08af6560dc8..20c9c56ba5e 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# Check userquota and groupquota be overwrited at same time +# Check userquota and groupquota being exceeded at the same time # # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh index 088499eb042..b553f91d40d 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh @@ -56,7 +56,7 @@ log_onexit cleanup typeset snap_fs=$QFS@snap log_assert "Check set userquota and groupquota on snapshot" -log_note "Check can not set user|group quuota on snapshot" +log_note "Check can not set user|group quota on snapshot" log_must zfs snapshot $snap_fs log_mustnot zfs set userquota@$QUSER1=$UQUOTA_SIZE $snap_fs diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh index a56fce4eaba..0a661e935b7 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh @@ -37,8 +37,8 @@ # should fail. # # STRATEGY: -# 1. Create a file, and set an with an xattr -# 2. Set the octal file permissions to 000 on the file. +# 1. Create a file with an xattr +# 2. Set the file permissions to 000 # 3. Check that we're unable to read the xattr as a non-root user # 4. Check that we're unable to write an xattr as a non-root user # diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh index 80704fad75e..246f077af0a 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh @@ -135,7 +135,7 @@ else fi log_note "Checking mv" -# mv doesn't have any flags to preserve/ommit xattrs - they're +# mv doesn't have any flags to preserve/omit xattrs - they're # always moved. log_must touch $TESTDIR/mvfile.$$ create_xattr $TESTDIR/mvfile.$$ passwd /etc/passwd diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh index 256ca53241b..9ccf3f9ded5 100755 --- a/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh @@ -40,7 +40,7 @@ # # STRATEGY: # 1. Modify /etc/vfstab to add the test zvol as swap device. -# 2. Use /sbin/swapadd to add zvol as swap device throuth /etc/vfstab +# 2. Use /sbin/swapadd to add zvol as swap device through /etc/vfstab # 3. Create a file under /tmp and verify the file # From d632608210af9d9d2d7b3d3f68314ca67e818a03 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:07:35 +0200 Subject: [PATCH 204/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9243 --- tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/send-cD.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh | 2 +- .../tests/functional/rsend/send-c_stream_size_estimate.ksh | 2 +- .../tests/functional/rsend/send_realloc_encrypted_files.ksh | 2 +- tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh index 5e657a898f4..53147876045 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh @@ -38,7 +38,7 @@ # STRATEGY: # 1. Separately promote pool clone, filesystem clone and volume clone. # 2. Recursively backup all the POOL and restore in POOL2 -# 3. Verify all the datesets and property be properly received. +# 3. Verify all the datasets and properties were properly received. # verify_runnable "both" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh index 9ecd18d87da..68f0e13927d 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh @@ -63,7 +63,7 @@ for prop in $(fs_inherit_prop); do done # -# Inherit propertes in sub-datasets +# Inherit properties in sub-datasets # for ds in "$POOL/$FS/fs1" "$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do for prop in $(fs_inherit_prop) ; do diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh index 57d58b9bab7..d85970a7421 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh @@ -39,7 +39,7 @@ # 1. Setting properties for all the filesystem and volumes randomly # 2. Backup all the data from POOL by send -R # 3. Restore all the data in POOL2 -# 4. Verify all the perperties in two pools are same +# 4. Verify all the properties in the two pools are the same # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh index 60be67328e1..cb68b1c3b27 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh @@ -25,7 +25,7 @@ # # Strategy: # 1. Bookmark a ZFS snapshot -# 2. Destroy the ZFS sanpshot +# 2. Destroy the ZFS snapshot # 3. Destroy the filesystem for the receive # 4. Verify receive of the full send stream # 5. Start an incremental ZFS send of the ZFS bookmark, redirect output to a diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh index 20f0bee1557..2d9fb01af10 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh @@ -25,7 +25,7 @@ # # Strategy: # 1. Destroy the filesystem for the receive -# 2. Unmount the source filsesystem +# 2. Unmount the source filesystem # 3. Start a full ZFS send, redirect output to a file # 4. Mess up the contents of the stream state file on disk # 5. Try ZFS receive, which should fail with a checksum mismatch error diff --git a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh index ceface9dbc0..d0754a4f1aa 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh @@ -45,7 +45,7 @@ typeset inc=$BACKDIR/stream.inc log_must zfs create -o compress=lz4 $sendfs log_must zfs create -o compress=lz4 $recvfs typeset dir=$(get_prop mountpoint $sendfs) -# Don't use write_compressible: we want compressible but undedupable data here. +# Don't use write_compressible: we want compressible but undeduplicable data. log_must eval "dd if=/dev/urandom bs=1024k count=4 | base64 >$dir/file" log_must zfs snapshot $sendfs@snap0 log_must eval "zfs send -D -c $sendfs@snap0 >$stream0" diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh index d8d7c40e493..05ba5ed244d 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh @@ -28,7 +28,7 @@ # 2. Mess up the contents of the stream state file on disk # 3. Try ZFS receive, which should fail with a checksum mismatch error # 4. ZFS send to the stream state file again using the receive_resume_token -# 5. ZFS receieve and verify the receive completes successfully +# 5. ZFS receive and verify the receive completes successfully # 6. Repeat steps on an incremental ZFS send # diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh index 130bc3dbc9c..f1106819288 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh @@ -89,4 +89,4 @@ for compress in $compress_types; do "$vol_csize and $vol_refer differed by too much" done -log_pass "The the stream size given by -P accounts for compressed send." +log_pass "The stream size given by -P accounts for compressed send." diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh index 3c3de86d91c..83a79784d22 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh @@ -25,7 +25,7 @@ # Strategy: # 1. Create a pool containing an encrypted filesystem. # 2. Use 'zfs send -wp' to perform a raw send of the initial filesystem. -# 3. Repeat the followings steps N times to verify raw incremental receives. +# 3. Repeat the following steps N times to verify raw incremental receives. # a) Randomly change several key dataset properties. # b) Modify the contents of the filesystem such that dnode reallocation # is likely during the 'zfs receive', and receive_object() exercises diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh index 4b89a73d808..27d65439b25 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh @@ -25,7 +25,7 @@ # Strategy: # 1. Create a pool containing an encrypted filesystem. # 2. Use 'zfs send -wp' to perform a raw send of the initial filesystem. -# 3. Repeat the followings steps N times to verify raw incremental receives. +# 3. Repeat the following steps N times to verify raw incremental receives. # a) Randomly change several key dataset properties. # b) Modify the contents of the filesystem such that dnode reallocation # is likely during the 'zfs receive', and receive_object() exercises From f6a70187d2b2a44cc8930c15a6194e3af9d67f1c Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:08:56 +0200 Subject: [PATCH 205/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9244 --- .../tests/functional/pool_checkpoint/checkpoint_removal.ksh | 2 +- .../tests/functional/pool_checkpoint/pool_checkpoint.kshlib | 2 +- .../tests/functional/projectquota/projectid_001_pos.ksh | 4 ++-- .../tests/functional/projectquota/projectid_002_pos.ksh | 2 +- .../tests/functional/projectquota/projectquota_004_neg.ksh | 2 +- .../tests/functional/projectquota/projectspace_004_pos.ksh | 2 +- .../tests/functional/projectquota/projecttree_002_pos.ksh | 2 +- .../tests/functional/projectquota/projecttree_003_neg.ksh | 4 ++-- tests/zfs-tests/tests/functional/pyzfs/Makefile.am | 2 +- tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh b/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh index ad96d5dcb63..514a0598416 100755 --- a/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh +++ b/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh @@ -52,7 +52,7 @@ populate_test_pool # # Create big empty file and do some writes at random # offsets to ensure that it takes up space. Note that -# the implcitly created filesystem ($FS0) does not +# the implicitly created filesystem ($FS0) does not # have compression enabled. # log_must mkfile $BIGFILESIZE $FS0FILE diff --git a/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib b/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib index 6e410e0c85f..ea6c03e9d59 100644 --- a/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib +++ b/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib @@ -27,7 +27,7 @@ # This is why these tests run directly on pools that use a # "real disk vdev" (meaning not a file based one). These tests # use the $TESTPOOL pool that is created on top of $TESTDISK. -# This pool is refered to as the "test pool" and thus all +# This pool is referred to as the "test pool" and thus all # the tests of this group use the testpool-related functions of # this file (not the nested_pools ones). # diff --git a/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh index 44af9941b92..46e79062a0e 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh @@ -38,8 +38,8 @@ # # # STRATEGY: -# 1. Create a regular file and a directroy. -# 2. Set project ID on both directroy and regular file. +# 1. Create a regular file and a directory. +# 2. Set project ID on both directory and regular file. # 3. New created subdir or regular file should inherit its parent's # project ID if its parent has project inherit flag. # 4. New created subdir should inherit its parent project's inherit flag. diff --git a/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh index 1a402e298b9..e382f464046 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh @@ -41,7 +41,7 @@ # 1. Create three directories # 2. Set tdir1 and tdir3 project ID as PRJID1, # set tdir2 project ID as PRJID2. -# 3. Create regular file under tdir1. It inherits tdir1 proejct ID. +# 3. Create regular file under tdir1. It inherits tdir1 project ID. # 4. Hardlink from tdir1's child to tdir2 should be denied, # move tdir1's child to tdir2 will be object recreated. # 5. Hardlink from tdir1's child to tdir3 should succeed. diff --git a/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh b/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh index df0eda7d770..a975d2a19f0 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh @@ -62,7 +62,7 @@ for prj in "${no_prjs[@]}"; do log_mustnot zfs set projectquota@$prj=100m $QFS done -log_note "can set all numberic id even that id is not existed" +log_note "can set all numeric id even if that id does not exist" log_must zfs set projectquota@12345678=100m $QFS set -A sizes "100mfsd" "m0.12m" "GGM" "-1234-m" "123m-m" diff --git a/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh index 494d7f3b7ac..ec299e0e7f9 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh @@ -38,7 +38,7 @@ # # STRATEGY: # 1. set project [obj]quota on the directory -# 2. set project ID and inherit flag on the directoty +# 2. set project ID and inherit flag on the directory # 3. run 'df [-i]' on the directory and check the result # diff --git a/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh index 4008811a19e..d6101924270 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh @@ -39,7 +39,7 @@ # # STRATEGY: # 1. Create a tree with 4 level directories. -# 2. Set project ID on both directroy and regular file via +# 2. Set project ID on both directory and regular file via # "zfs project -p". # 3. Check the project ID via "zfs project". # 4. Set project inherit flag on kinds of level directories (and its diff --git a/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh b/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh index 33382fdbe92..cbc45857f77 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh @@ -43,8 +43,8 @@ # 2. "-C" only supports "-r" and "-k". # 3. "-s" only supports "-r" and "-p". # 4. "-c", "-C" and "-s" can NOT be specified together. -# 5. "-d" can overwirte former "-r". -# 6. "-r" can overwirte former "-d". +# 5. "-d" can overwrite former "-r". +# 6. "-r" can overwrite former "-d". # 7. "-0" must be together with "-c". # 8. "-d" must be on directory. # 9. "-r" must be on directory. diff --git a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am index 0a27adeccaf..4d99285e49c 100644 --- a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am +++ b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am @@ -7,7 +7,7 @@ EXTRA_DIST = \ # # The pyzfs module is built either for Python 2 or Python 3. In order -# to properly test it the unit tests must be updated to the matching vesion. +# to properly test it the unit tests must be updated to the matching version. # $(pkgpyzfs_SCRIPTS):%:%.in -$(SED) -e 's,@PYTHON\@,$(PYTHON),g' \ diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh index 4c105b9411c..0f88a1a5146 100755 --- a/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh @@ -35,4 +35,4 @@ log_mustnot raidz_test -T -log_pass "raidz_test detects errors as espected." +log_pass "raidz_test detects errors as expected." From 7a7da11671c949cc39c68658b3c637b4d553b2dc Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:10:31 +0200 Subject: [PATCH 206/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9246 --- .../zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh | 2 +- .../zfs-tests/tests/functional/fault/auto_online_001_pos.ksh | 2 +- .../zfs-tests/tests/functional/fault/auto_spare_multiple.ksh | 2 +- tests/zfs-tests/tests/functional/history/history_005_neg.ksh | 4 ++-- tests/zfs-tests/tests/functional/history/history_006_neg.ksh | 2 +- tests/zfs-tests/tests/functional/history/history_007_pos.ksh | 2 +- .../zfs-tests/tests/functional/history/history_common.kshlib | 2 +- tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh | 2 +- tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh | 4 ++-- tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh | 2 +- .../tests/functional/large_files/large_files_001_pos.ksh | 2 +- tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh | 2 +- tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh | 4 ++-- tests/zfs-tests/tests/functional/no_space/enospc_df.ksh | 2 +- tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh b/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh index c2c91102041..45fdb5b8569 100755 --- a/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh @@ -36,7 +36,7 @@ # zfs allow can deal with invalid arguments.(Invalid options or combination) # # STRATEGY: -# 1. Verify invalid argumets will cause error. +# 1. Verify invalid arguments will cause error. # 2. Verify non-optional argument was missing will cause error. # 3. Verify invalid options cause error. # diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh index bc925bc91c8..03fc15a8a7c 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh @@ -129,7 +129,7 @@ do typeset -i timeout=0 while true; do if ((timeout == $MAXTIMEOUT)); then - log_fail "Timeout occured" + log_fail "Timeout occurred" fi ((timeout++)) diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh index 8650ceff7d1..25c23aecc30 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh @@ -116,7 +116,7 @@ for type in "mirror" "raidz" "raidz2" "raidz3"; do done # Rinse and repeat, this time faulting both devices at the same time -# NOTE: "raidz" is exluded since it cannot survive 2 faulted devices +# NOTE: "raidz" is excluded since it cannot survive 2 faulted devices # NOTE: "mirror" is a 4-way mirror here and should survive this test for type in "mirror" "raidz2" "raidz3"; do # 1. Create a pool with two hot spares diff --git a/tests/zfs-tests/tests/functional/history/history_005_neg.ksh b/tests/zfs-tests/tests/functional/history/history_005_neg.ksh index f6a81a4ac5f..297a701cc56 100755 --- a/tests/zfs-tests/tests/functional/history/history_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/history/history_005_neg.ksh @@ -42,9 +42,9 @@ # zpool iostat # # STRATEGY: -# 1. Create a test pool. +# 1. Create a test pool # 2. Separately invoke zpool list|status|iostat -# 3. Verify they was not recored in pool history. +# 3. Verify they were not recorded in pool history # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/history/history_006_neg.ksh b/tests/zfs-tests/tests/functional/history/history_006_neg.ksh index a2da831c5cc..e97adc4e3ce 100755 --- a/tests/zfs-tests/tests/functional/history/history_006_neg.ksh +++ b/tests/zfs-tests/tests/functional/history/history_006_neg.ksh @@ -40,7 +40,7 @@ # STRATEGY: # 1. Create a test pool. # 2. Separately invoke zfs list|get|holds|mount|unmount|share|unshare|send -# 3. Verify they were not recored in pool history. +# 3. Verify they were not recorded in pool history. # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/history/history_007_pos.ksh b/tests/zfs-tests/tests/functional/history/history_007_pos.ksh index b65e855d8c7..d1c92c5e7c2 100755 --- a/tests/zfs-tests/tests/functional/history/history_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_007_pos.ksh @@ -83,7 +83,7 @@ for arch in "i386" "sparc"; do TZ=$TIMEZONE zpool history $migratedpoolname | grep -v "^$" \ >$migrated_cmds_f RET=$? - (( $RET != 0 )) && log_fail "zpool histroy $migratedpoolname fails." + (( $RET != 0 )) && log_fail "zpool history $migratedpoolname fails." # The migrated history file should differ with original history file on # two commands -- 'export' and 'import', which are included in migrated diff --git a/tests/zfs-tests/tests/functional/history/history_common.kshlib b/tests/zfs-tests/tests/functional/history/history_common.kshlib index 80af2e903da..d97e015fcfe 100644 --- a/tests/zfs-tests/tests/functional/history/history_common.kshlib +++ b/tests/zfs-tests/tests/functional/history/history_common.kshlib @@ -224,7 +224,7 @@ function verify_allow # # Here, we determine three things: - # - Whether we're operating on a set or an indivdual permission (which + # - Whether we're operating on a set or an individual permission (which # dictates the case of the first character in the code) # - The name of the dataset we're operating on. # - Whether the operation applies locally or to descendent datasets (or diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh index 63c68e66e4e..aecdc5a3b07 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh @@ -80,7 +80,7 @@ dumpdev=`dumpadm | grep "Dump device" | awk '{print $3}'` [[ -z "$dumpdev" ]] && log_untested "No dump device has been configured" [[ "$dumpdev" != "$diskslice" ]] && \ - log_untested "Dump device has not been been configured to $diskslice" + log_untested "Dump device has not been configured to $diskslice" log_note "Attempt to zpool the dump device" unset NOINUSE_CHECK diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh index 95d505f35bf..b126f66a0c3 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh @@ -48,8 +48,8 @@ verify_runnable "global" function cleanup { # - # Essentailly this is the default_cleanup routine but I cannot get it - # to work correctly. So its reproduced below. Still need to full + # Essentially this is the default_cleanup routine but I cannot get it + # to work correctly. So its reproduced below. Still need to fully # understand why default_cleanup does not work correctly from here. # log_must zfs umount $TESTPOOL/$TESTFS diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh index ddc8fa7a49c..1f5510ae5e6 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh @@ -69,7 +69,7 @@ function verify_assertion #slices echo "y" | newfs -v $t > /dev/null 2>&1 (( $? !=0 )) && \ log_fail "newfs over exported pool " \ - "failes unexpected." + "fails unexpectedly." done return 0 diff --git a/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh b/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh index 3be20356ea0..f59603724e7 100755 --- a/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh @@ -38,7 +38,7 @@ # STRATEGY: # 1. largest_file will write to a file and increase its size # to the maximum allowable. -# 2. The last byte of the file should be accessbile without error. +# 2. The last byte of the file should be accessible without error. # 3. Writing beyond the maximum file size generates an 'errno' of # EFBIG. # diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh index 24150b827f8..2f4257993d4 100755 --- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# Writing to a file and mmaping that file at the +# Writing to a file and mmapping that file at the # same time does not result in a deadlock. # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh index bf1eb54a738..9c4552b0cfb 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh @@ -66,11 +66,11 @@ UBER_CHANGES=$(count_mmp_writes $TESTPOOL 10) log_note "Uberblock changed $UBER_CHANGES times" if [ $UBER_CHANGES -lt $MIN_UB_WRITES ]; then - log_fail "Fewer uberblock writes occured than expected ($EXPECTED)" + log_fail "Fewer uberblock writes occurred than expected ($EXPECTED)" fi if [ $UBER_CHANGES -gt $MAX_UB_WRITES ]; then - log_fail "More uberblock writes occured than expected ($EXPECTED)" + log_fail "More uberblock writes occurred than expected ($EXPECTED)" fi log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN diff --git a/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh b/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh index b3df69141fe..b1eeaf2cc56 100755 --- a/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh +++ b/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh @@ -58,7 +58,7 @@ log_must zfs umount $TESTPOOL/$TESTFS # Ensure the pool root filesystem shows in df output. # If the pool was full (available == 0) and the pool -# root filesytem had very little in it (used < 1 block), +# root filesystem had very little in it (used < 1 block), # the size reported to df was zero (issue #8253) and # df skipped the filesystem in its output. log_must eval "df -h | grep $TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh b/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh index c9d7b59b344..bd38883d757 100755 --- a/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh +++ b/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh @@ -24,7 +24,7 @@ # # Strategy: # 1. Create an origin fs with compression and sha256. -# 2. Clone origin such that it inherits the properies. +# 2. Clone origin such that it inherits the properties. # 3. Use dd with the sync flag to test the sync write path. # From 36be89b8e57746f6dc9d35bd6bfe94f1bcbd42e2 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:12:01 +0200 Subject: [PATCH 207/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9247 --- .../tests/functional/cli_root/zpool_add/zpool_add.kshlib | 2 +- .../functional/cli_root/zpool_create/zpool_create.shlib | 2 +- .../cli_root/zpool_create/zpool_create_005_pos.ksh | 6 +++--- .../cli_root/zpool_create/zpool_create_016_pos.ksh | 2 +- .../cli_root/zpool_events/zpool_events_clear.ksh | 2 +- .../cli_root/zpool_history/zpool_history_001_neg.ksh | 2 +- .../zpool_import/import_rewind_config_changed.ksh | 2 +- .../zpool_import/zpool_import_missing_002_pos.ksh | 2 +- .../cli_root/zpool_labelclear/zpool_labelclear_active.ksh | 4 ++-- .../zpool_labelclear/zpool_labelclear_exported.ksh | 8 ++++---- .../cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh | 2 +- tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh | 2 +- .../functional/cli_user/misc/zpool_online_001_neg.ksh | 2 +- .../cli_user/zpool_iostat/zpool_iostat_005_pos.ksh | 2 +- .../cli_user/zpool_status/zpool_status_003_pos.ksh | 2 +- 15 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib index f80a2a864e4..94615ee3a0b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib @@ -90,7 +90,7 @@ function find_mnttab_dev } # -# Save the systme current dump device configuration +# Save the system current dump device configuration # function save_dump_dev { diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib index 9e687483206..3f3f4472990 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib @@ -146,7 +146,7 @@ function find_vfstab_dev } # -# Save the systme current dump device configuration +# Save the system current dump device configuration # function save_dump_dev { diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh index 2afbec37dca..de5e9d8e79c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh @@ -95,7 +95,7 @@ do log_must zpool create $opt $TESTPOOL ${pooltype[i]} \ $file.1 $file.2 $file.3 ! poolexists $TESTPOOL && \ - log_fail "Createing pool with $opt fails." + log_fail "Creating pool with $opt fails." mpt=`zfs mount | egrep "^$TESTPOOL[^/]" | awk '{print $2}'` (( ${#mpt} == 0 )) && \ log_fail "$TESTPOOL created with $opt is not mounted." @@ -105,12 +105,12 @@ do from the output of zfs mount" if [[ "$opt" == "-m $TESTDIR1" ]]; then [[ ! -d $TESTDIR1 ]] && \ - log_fail "$TESTDIR1 is not created auotmatically." + log_fail "$TESTDIR1 is not created automatically." [[ "$mpt" != "$TESTDIR1" ]] && \ log_fail "$TESTPOOL is not mounted on $TESTDIR1." elif [[ "$opt" == "-R $TESTDIR1" ]]; then [[ ! -d $TESTDIR1/$TESTPOOL ]] && \ - log_fail "$TESTDIR1/$TESTPOOL is not created auotmatically." + log_fail "$TESTDIR1/$TESTPOOL is not created automatically." [[ "$mpt" != "$TESTDIR1/$TESTPOOL" ]] && \ log_fail "$TESTPOOL is not mounted on $TESTDIR1/$TESTPOOL." else diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh index 3fca607b1f4..cbb5806d9af 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh @@ -41,7 +41,7 @@ # STRATEGY: # 1. delete all devices in the swap # 2. create a zpool -# 3. Verify the creation is successed. +# 3. Verify the creation was successful # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh index ab862354b81..67038a4743d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh @@ -34,7 +34,7 @@ log_assert "'zpool events -c' should successfully clear events." # 1. Clear all ZFS events # This is needed because we may already over the max number or events queued # (zfs_zevent_len_max) generated by previous tests: generating $EVENTS_NUM new -# events and then counting them is racy and leads to failues, so start from 0. +# events and then counting them is racy and leads to failures, so start from 0. log_must zpool events -c # 2. Generate some new ZFS events diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh index dd1be14a066..a2b73182bf4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh @@ -38,7 +38,7 @@ # # STRATEGY: # 1. Create pool, volume & snap -# 2. Verify 'zpool history' can cope with incorret arguments. +# 2. Verify 'zpool history' can cope with incorrect arguments. # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh index e8f3937609d..f42ba10d65c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh @@ -115,7 +115,7 @@ function test_common # further than the time that we took the checkpoint. # # Note that, ideally we would want to take a checkpoint - # right after we recond the txg we plan to rewind to. + # right after we record the txg we plan to rewind to. # But since we can't attach, detach or remove devices # while having a checkpoint, we take it after the # operation that changes the config. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh index 7534ebca87f..c6d2637074f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh @@ -54,7 +54,7 @@ # 3. Export the test pool. # 4. Move one or more device files to other directory # 5. Verify 'zpool import -d' with the new directory -# will handle moved files successfullly. +# will handle moved files successfully. # Using the various combinations. # - Regular import # - Alternate Root Specified diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh index dcca2e9335d..b63d55d7ad6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh @@ -24,8 +24,8 @@ # STRATEGY: # 1. Create the pool with log device. # 2. Try clearing the label on data and log devices. -# 3. Add auxilary (cache/spare) vdevs. -# 4. Try clearing the label on auxilary vdevs. +# 3. Add auxiliary (cache/spare) vdevs. +# 4. Try clearing the label on auxiliary vdevs. # 5. Check that zpool labelclear will return non-zero and # labels are intact. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh index a5131bdbb78..72a555bebe0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh @@ -26,8 +26,8 @@ # 2. Export the pool. # 3. Check that zpool labelclear returns non-zero when trying to # clear the label on ACTIVE vdevs, and succeeds with -f. -# 4. Add auxilary vdevs (cache/spare). -# 5. Check that zpool labelclear succeeds on auxilary vdevs of +# 4. Add auxiliary vdevs (cache/spare). +# 5. Check that zpool labelclear succeeds on auxiliary vdevs of # exported pool. verify_runnable "global" @@ -44,7 +44,7 @@ log_assert "zpool labelclear will fail on ACTIVE vdevs of exported pool and" \ for vdevtype in "" "cache" "spare"; do # Create simple pool, skip any mounts log_must zpool create -O mountpoint=none -f $TESTPOOL $disk1 log $disk2 - # Add auxilary vdevs (cache/spare) + # Add auxiliary vdevs (cache/spare) if [[ -n $vdevtype ]]; then log_must zpool add $TESTPOOL $vdevtype $disk3 fi @@ -63,7 +63,7 @@ for vdevtype in "" "cache" "spare"; do log_must zpool labelclear -f $disk2 log_mustnot zdb -lq $disk2 - # Check that labelclear on auxilary vdevs will succeed + # Check that labelclear on auxiliary vdevs will succeed if [[ -n $vdevtype ]]; then log_must zpool labelclear $disk3 log_mustnot zdb -lq $disk3 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh index adc1ba47fcc..696c8c66cc1 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh @@ -42,7 +42,7 @@ # # STRATEGY: # 1. Import pools of all versions -# 2. Setup a test enviorment over the old pools. +# 2. Setup a test environment over the old pools. # 3. Verify the commands related to 'zfs upgrade' succeed as expected. # diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh index bcf6a2296d5..fc0ebde1002 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh @@ -34,7 +34,7 @@ # This setup script is moderately complex, as it creates scenarios for all # of the tests included in this directory. Usually we'd want each test case -# to setup/teardown it's own configuration, but this would be time consuming +# to setup/teardown its own configuration, but this would be time consuming # given the nature of these tests. However, as a side-effect, one test # leaving the system in an unknown state could impact other test cases. diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh index b89cf07ac18..cd290515357 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh @@ -49,7 +49,7 @@ function check_for_online | grep ONLINE ) if [ -n "$RESULT" ] then - log_fail "A disk was brough online!" + log_fail "A disk was brought online!" fi } diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh index 1ae91c1a843..53652ec11b5 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh @@ -68,7 +68,7 @@ for i in $files ; do test_zpool_script "$i" "$testpool" "zpool iostat -Pv -c" done -# Test that we can run multiple scripts separated with a commma by running +# Test that we can run multiple scripts separated with a comma by running # all the scripts in a single -c line. allscripts="$(echo $scripts | sed -r 's/[[:blank:]]+/,/g')" test_zpool_script "$allscripts" "$testpool" "zpool iostat -Pv -c" diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh index c5e0c6e474a..fa7d3f3f2d5 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh @@ -68,7 +68,7 @@ for i in $files ; do test_zpool_script "$i" "$testpool" "zpool status -P -c" done -# Test that we can run multiple scripts separated with a commma by running +# Test that we can run multiple scripts separated with a comma by running # all the scripts in a single -c line. allscripts="$(echo $scripts | sed -r 's/[[:blank:]]+/,/g')" test_zpool_script "$allscripts" "$testpool" "zpool status -P -c" From 2af76a25abfc2488ea787db8a2b6c1cb8261430b Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:13:19 +0200 Subject: [PATCH 208/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9249 --- .../functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh | 2 +- .../cli_root/zfs_destroy/zfs_destroy_common.kshlib | 6 +++--- .../tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh | 2 +- .../functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh | 4 ++-- .../functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh | 4 ++-- .../functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh | 2 +- .../functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh | 2 +- .../functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh | 2 +- .../functional/cli_root/zfs_mount/zfs_mount_remount.ksh | 2 +- .../cli_root/zfs_property/zfs_written_property_001_pos.ksh | 6 +++--- .../functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh | 2 +- .../functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh | 2 +- .../cli_root/zfs_receive/zfs_receive_from_encrypted.ksh | 4 ++-- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh index 83cd0a27c30..1e129ddd3bc 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh @@ -157,7 +157,7 @@ verify_snapshots 1 snaps="1 2 3 4 5" setup_snapshots -log_note "Snapshot destory with hold" +log_note "Snapshot destroy with hold" range="1 2 3 4 5" for i in 1 2 3 4 5; do log_must zfs hold keep $TESTPOOL/$TESTFS1@snap$i diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib index 0a6f5ed9d1a..9a75daedbb3 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib @@ -56,7 +56,7 @@ function setup_testenv #[dtst] if ! datasetexists $FS; then log_must zfs create $FS fi - # Volume test is only availible on globle zone + # Volume test is only available on global zone if ! datasetexists $VOL && is_global_zone; then log_must zfs create -V $VOLSIZE $VOL block_device_wait @@ -127,7 +127,7 @@ function check_dataset shift for dtst in "$@"; do - # Volume and related stuff are unvailable in local zone + # Volume and related stuff are unavailable in local zone if ! is_global_zone; then if [[ $dtst == $VOL || $dtst == $VOLSNAP || \ $dtst == $VOLCLONE ]] @@ -140,7 +140,7 @@ function check_dataset if (( ${#newlist} != 0 )); then # Run each item in $newlist individually so on failure, the - # probelmatic dataset is listed in the logs. + # problematic dataset is listed in the logs. for i in $newlist; do log_must $funname $i done diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh index b038e7484ab..4bd61137c7b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh @@ -114,7 +114,7 @@ availspace=$(get_prop available $TESTPOOL) typeset -i i=0 # make sure 'availspace' is larger then twice of FILESIZE to create a new pool. -# If any, we only totally create 3 pools for multple datasets testing to limit +# If any, we only totally create 3 pools for multiple datasets testing to limit # testing time while (( availspace > DFILESIZE )) && (( i < 3 )) ; do (( i += 1 )) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh index 584039f543c..3ef65b517c6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh @@ -36,8 +36,8 @@ # 'zfs inherit' should return an error with bad parameters in one command. # # STRATEGY: -# 1. Set an array of bad options and invlid properties to 'zfs inherit' -# 2. Execute 'zfs inherit' with bad options and passing invlid properties +# 1. Set an array of bad options and invalid properties to 'zfs inherit' +# 2. Execute 'zfs inherit' with bad options and passing invalid properties # 3. Verify an error is returned. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh index bc0d8c59c0c..3317b09e2b5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh @@ -37,8 +37,8 @@ # 'zfs inherit' should return an error with bad parameters in one command. # # STRATEGY: -# 1. Set an array of bad options and invlid properties to 'zfs inherit' -# 2. Execute 'zfs inherit' with bad options and passing invlid properties +# 1. Set an array of bad options and invalid properties to 'zfs inherit' +# 2. Execute 'zfs inherit' with bad options and passing invalid properties # 3. Verify an error is returned. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh index e2ef0bf00db..52ae1879d1a 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh @@ -45,7 +45,7 @@ # setuid setuid/nosetuid # # STRATEGY: -# 1. Create filesystem and get origianl property value. +# 1. Create filesystem and get original property value. # 2. Using 'zfs mount -o' to set filesystem property. # 3. Verify the property was set temporarily. # 4. Verify it will not affect the property that is stored on disk. diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh index 5f88b611002..84835a0d6d6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh @@ -83,7 +83,7 @@ log_must mkfile 1M $mntpnt/$TESTFILE2 log_mustnot ls $testfile log_must ls $mntpnt/$TESTFILE1 $mntpnt/$TESTFILE2 -# Verify $TESTFILE2 was created in $fs1, rather then $fs +# Verify $TESTFILE2 was created in $fs1, rather than $fs log_must zfs unmount $fs1 log_must zfs set mountpoint=$mntpnt1 $fs1 log_must zfs mount $fs1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh index 963ad626c2d..0b5d61f62f4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh @@ -72,4 +72,4 @@ else fi cd $curpath -log_pass "zfs mount fails with mounted filesystem or busy moutpoint as expected." +log_pass "zfs mount fails with mounted filesystem or busy mountpoint as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh index f7a0978352b..66a4338655d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh @@ -29,7 +29,7 @@ # # DESCRIPTION: -# Verify remount functionality, expecially on readonly objects. +# Verify remount functionality, especially on readonly objects. # # STRATEGY: # 1. Prepare a filesystem and a snapshot diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh index bf94274ddbf..9a2d3cb8025 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh @@ -17,7 +17,7 @@ # # DESCRIPTION # Verify that "zfs list" gives correct values for written and written@ -# proerties for the dataset when different operations are on done on it +# properties for the dataset when different operations are on done on it # # # STRATEGY @@ -86,7 +86,7 @@ blocks=0 for i in 1 2 3; do written=$(get_prop written $TESTPOOL/$TESTFS1@snap$i) if [[ $blocks -eq 0 ]]; then - # Written value for the frist non-clone snapshot is + # Written value for the first non-clone snapshot is # expected to be equal to the referenced value. expected_written=$( \ get_prop referenced $TESTPOOL/$TESTFS1@snap$i) @@ -120,7 +120,7 @@ sync_pool written=$(get_prop written $TESTPOOL/$TESTFS1) writtenat3=$(get_prop written@snap3 $TESTPOOL/$TESTFS1) [[ $written -eq $writtenat3 ]] || \ - log_fail "Written and written@ dont match $written $writtenat3" + log_fail "Written and written@ don't match $written $writtenat3" within_percent $written $before_written 0.1 && \ log_fail "Unexpected written value after delete $written $before_written" writtenat=$(get_prop written@snap1 $TESTPOOL/$TESTFS1) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh index 5ce0e02fa61..f8439dcbbeb 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh @@ -155,7 +155,7 @@ for orig_fs in $datasets ; do log_must zfs destroy -Rf $rst_fs - log_note "Verfiying 'zfs receive -d ' works." + log_note "Verifying 'zfs receive -d ' works." i=0 while (( i < ${#bkup[*]} )); do diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh index fcbdc5e1594..3a9c2279a61 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh @@ -36,7 +36,7 @@ # Verify 'zfs receive' fails with malformed parameters. # # STRATEGY: -# 1. Denfine malformed parameters array +# 1. Define malformed parameters array # 2. Feed the malformed parameters to 'zfs receive' # 3. Verify the command should be failed # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh index 5eee9eecf4b..de771ccf395 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -31,9 +31,9 @@ # 4. Snapshot the encrypted dataset # 5. Attempt to receive the snapshot into an unencrypted child # 6. Verify encryption is not enabled -# 7. Verify the cheksum of the file is the same as the original +# 7. Verify the checksum of the file is the same as the original # 8. Attempt to receive the snapshot into an encrypted child -# 9. Verify the cheksum of the file is the same as the original +# 9. Verify the checksum of the file is the same as the original # verify_runnable "both" From 18d335d830d7a87178c7e4a480e2779a56174c58 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:14:53 +0200 Subject: [PATCH 209/325] Fix typos in tests/ Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9250 --- tests/zfs-tests/cmd/mmapwrite/mmapwrite.c | 2 +- tests/zfs-tests/include/blkdev.shlib | 2 +- tests/zfs-tests/include/libtest.shlib | 6 +++--- .../zfs-tests/tests/functional/acl/acl_common.kshlib | 12 ++++++------ .../tests/functional/cachefile/cachefile_004_pos.ksh | 4 ++-- .../functional/casenorm/insensitive_formd_lookup.ksh | 2 +- .../synctask_core/tst.list_user_props.ksh | 2 +- .../synctask_core/tst.terminate_by_signal.ksh | 2 +- .../cli_root/zfs_clone/zfs_clone_010_pos.ksh | 2 +- .../cli_root/zfs_copies/zfs_copies_002_pos.ksh | 2 +- .../cli_root/zfs_create/zfs_create_011_pos.ksh | 2 +- .../cli_root/zfs_destroy/zfs_destroy_001_pos.ksh | 2 +- .../cli_root/zfs_destroy/zfs_destroy_005_neg.ksh | 4 ++-- .../cli_root/zfs_destroy/zfs_destroy_014_pos.ksh | 4 ++-- 14 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c index b9915d5d31e..458d6d8e402 100644 --- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c +++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c @@ -43,7 +43,7 @@ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous * txg "n" completed. * 3. context #1 call uiomove to write, however page fault is occurred in - * uiomove, which means it need mm_sem, but mm_sem is hold by + * uiomove, which means it needs mm_sem, but mm_sem is hold by * context #2, so it stuck and can't complete, then txg "n" will not * complete. * diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index ca8807e82c6..87500e92a39 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -131,7 +131,7 @@ function is_loop_device #disk } # -# Check if the given device is a multipath device and if there is a sybolic +# Check if the given device is a multipath device and if there is a symbolic # link to a device mapper and to a disk # Currently no support for dm devices alone without multipath # diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 10949760081..5281c89effb 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -856,7 +856,7 @@ function zero_partitions # # Size should be specified with units as per # the `format` command requirements eg. 100mb 3gb # -# NOTE: This entire interface is problematic for the Linux parted utilty +# NOTE: This entire interface is problematic for the Linux parted utility # which requires the end of the partition to be specified. It would be # best to retire this interface and replace it with something more flexible. # At the moment a best effort is made. @@ -1059,7 +1059,7 @@ function partition_disk # # dirnum: the maximum number of subdirectories to use, -1 no limit # filenum: the maximum number of files per subdirectory # bytes: number of bytes to write -# num_writes: numer of types to write out bytes +# num_writes: number of types to write out bytes # data: the data that will be written # # E.g. @@ -2846,7 +2846,7 @@ function labelvtoc # # check if the system was installed as zfsroot or not -# return: 0 ture, otherwise false +# return: 0 if zfsroot, non-zero if not # function is_zfsroot { diff --git a/tests/zfs-tests/tests/functional/acl/acl_common.kshlib b/tests/zfs-tests/tests/functional/acl/acl_common.kshlib index a81cd76ba6a..ba08bcb48be 100644 --- a/tests/zfs-tests/tests/functional/acl/acl_common.kshlib +++ b/tests/zfs-tests/tests/functional/acl/acl_common.kshlib @@ -34,7 +34,7 @@ # # Get the given file/directory access mode # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_mode # { @@ -49,7 +49,7 @@ function get_mode # # # Get the given file/directory ACL # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_acl # { @@ -64,7 +64,7 @@ function get_acl # # # Get the given file/directory ACL # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_compact_acl # { @@ -243,12 +243,12 @@ function usr_exec # [...] # # Count how many ACEs for the specified file or directory. # -# $1 file or directroy name +# $1 file or directory name # function count_ACE # { if [[ ! -e $1 ]]; then - log_note "Need input file or directroy name." + log_note "Need input file or directory name." return 1 fi @@ -399,7 +399,7 @@ function rwx_node #user node acl_spec|access # # Get the given file/directory xattr # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_xattr # { diff --git a/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh b/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh index e0b81e16627..841b141e16f 100755 --- a/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh @@ -38,9 +38,9 @@ # Verify set, export and destroy when cachefile is set on pool. # # STRATEGY: -# 1. Create two pools with one same cahcefile1. +# 1. Create two pools with one same cachefile1. # 2. Set cachefile of the two pools to another same cachefile2. -# 3. Verify cachefile1 not exist. +# 3. Verify cachefile1 does not exist. # 4. Export the two pools. # 5. Verify cachefile2 not exist. # 6. Import the two pools and set cachefile to cachefile2. diff --git a/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh b/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh index d28431300a3..1ef9d2756fc 100755 --- a/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh +++ b/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh @@ -19,7 +19,7 @@ # DESCRIPTION: # For the filesystem with casesensitivity=insensitive, normalization=formD, -# check that lookup succeds using any name form. +# check that lookup succeeds using any name form. # # STRATEGY: # For each c/n name form: diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh index 910dddc03f7..2f5d214ebbf 100755 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh @@ -95,4 +95,4 @@ log_must_program $TESTPOOL - <<-EOF return 0 EOF -log_pass "Listing zfs user properies should work correctly." +log_pass "Listing zfs user properties should work correctly." diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh index 6f58cc1f4f8..74889eba805 100755 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh @@ -90,7 +90,7 @@ snap_count=$(zfs list -t snapshot | grep $TESTPOOL | wc -l) log_note "$snap_count snapshots created by ZCP" if [ "$snap_count" -eq 0 ]; then - log_fail "Channel progam failed to run." + log_fail "Channel program failed to run." elif [ "$snap_count" -gt 50 ]; then log_fail "Too many snapshots after a cancel ($snap_count)." else diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh index 40cabf649d1..62a755eaeef 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh @@ -150,7 +150,7 @@ log_note "Verify zfs clone property for multiple clones" names=$(zfs list -rt all -o name $TESTPOOL) log_must verify_clones 3 0 -log_note "verfify clone property for clone deletion" +log_note "verify clone property for clone deletion" i=1 for ds in $datasets; do log_must zfs destroy $ds/$TESTCLONE.$i diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh index a5a9729dc17..11265cd5afe 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh @@ -92,7 +92,7 @@ for val in 1 2 3; do check_used $used $val done -log_note "Verify df(1M) can corectly display the space charged." +log_note "Verify df(1M) can correctly display the space charged." for val in 1 2 3; do used=`df -F zfs -k /$TESTPOOL/fs_$val/$FILE | grep $TESTPOOL/fs_$val \ | awk '{print $3}'` diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh index 0144b050d7d..982a4ea16b5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# 'zfs create -p' should work as expecteed +# 'zfs create -p' should work as expected # # STRATEGY: # 1. To create $newdataset with -p option, first make sure the upper level diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh index 534c33f0a02..26857d48d48 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh @@ -53,7 +53,7 @@ verify_runnable "both" # run 'zfs destroy $opt '. 3rd, check the system status. # # $1 option of 'zfs destroy' -# $2 dataset will be destroied. +# $2 dataset will be destroyed. # function test_n_check { diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh index 2e4a0c3b2bb..1c5b2cf1c74 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh @@ -145,8 +145,8 @@ if is_global_zone; then check_dataset datasetexists $CTR $VOL check_dataset datasetnonexists $VOLSNAP $VOLCLONE - # Due to recusive destroy being a best-effort operation, - # all of the non-busy datasets bellow should be gone now. + # Due to recursive destroy being a best-effort operation, + # all of the non-busy datasets below should be gone now. check_dataset datasetnonexists $FS $FSSNAP $FSCLONE fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh index df7cfcf5271..58c4cfb5646 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh @@ -24,7 +24,7 @@ # # DESCRIPTION: # 'zfs destroy -R ' can destroy all the child -# snapshots and preserves all the nested datasetss. +# snapshots and preserves all the nested datasets. # # STRATEGY: # 1. Create nested datasets in the storage pool. @@ -57,7 +57,7 @@ for ds in $datasets; do datasetexists $ds || log_fail "Create $ds dataset fail." done -# create recursive nestedd snapshot +# create recursive nested snapshot log_must zfs snapshot -r $TESTPOOL/$TESTFS1@snap for ds in $datasets; do datasetexists $ds@snap || log_fail "Create $ds@snap snapshot fail." From 4ff90260c0f8185fe14d2d448c1e1af345029119 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Tue, 3 Sep 2019 03:17:39 +0200 Subject: [PATCH 210/325] Fix typos Reviewed-by: Ryan Moeller Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Andrea Gelmini Closes #9251 --- .github/CONTRIBUTING.md | 2 +- scripts/kmodtool | 6 +++--- tests/README.md | 2 +- tests/test-runner/bin/Makefile.am | 2 +- tests/test-runner/bin/test-runner.py | 4 ++-- tests/test-runner/man/test-runner.1 | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 004711ae78c..2b47d458c1a 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -51,7 +51,7 @@ configure option should be set. This will enable additional correctness checks and all the ASSERTs to help quickly catch potential issues. In addition, there are numerous utilities and debugging files which -provide visibility in to the inner workings of ZFS. The most useful +provide visibility into the inner workings of ZFS. The most useful of these tools are discussed in detail on the [debugging ZFS wiki page](https://github.com/zfsonlinux/zfs/wiki/Debugging). diff --git a/scripts/kmodtool b/scripts/kmodtool index 9298d6d27df..b928c928620 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -409,7 +409,7 @@ print_rpmtemplate () # and print it and some other required stuff as macro print_rpmtemplate_header - # now print the packages itselfs + # now print the packages for kernel in ${kernel_versions_to_build_for} ; do local kernel_verrelarch=${kernel%%${kernels_known_variants}} @@ -501,7 +501,7 @@ while [ "${1}" ] ; do --obsolete-name) shift if [[ ! "${1}" ]] ; then - error_out 2 "Please provide the name of the kmod to obsolte together with --obsolete-name" >&2 + error_out 2 "Please provide the name of the kmod to obsolete together with --obsolete-name" >&2 fi obsolete_name="${1}" shift @@ -509,7 +509,7 @@ while [ "${1}" ] ; do --obsolete-version) shift if [[ ! "${1}" ]] ; then - error_out 2 "Please provide the version of the kmod to obsolte together with --obsolete-version" >&2 + error_out 2 "Please provide the version of the kmod to obsolete together with --obsolete-version" >&2 fi obsolete_version="${1}" shift diff --git a/tests/README.md b/tests/README.md index 7b3768c2911..b2c7f99c709 100644 --- a/tests/README.md +++ b/tests/README.md @@ -78,7 +78,7 @@ The following zfs-tests.sh options are supported: when test-runner exists. This is useful when the results of a specific test need to be preserved for further analysis. - -f Use sparse files directly instread of loopback devices for + -f Use sparse files directly instead of loopback devices for the testing. When running in this mode certain tests will be skipped which depend on real block devices. diff --git a/tests/test-runner/bin/Makefile.am b/tests/test-runner/bin/Makefile.am index 30c564e5553..e1ae21548e9 100644 --- a/tests/test-runner/bin/Makefile.am +++ b/tests/test-runner/bin/Makefile.am @@ -3,7 +3,7 @@ dist_pkgdata_SCRIPTS = \ test-runner.py \ zts-report.py # -# These scripts are compatibile with both Python 2.6 and 3.4. As such the +# These scripts are compatible with both Python 2.6 and 3.4. As such the # python 3 shebang can be replaced at install time when targeting a python # 2 system. This allows us to maintain a single version of the source. # diff --git a/tests/test-runner/bin/test-runner.py b/tests/test-runner/bin/test-runner.py index 4d4fd96ad77..bf2c77c18a9 100755 --- a/tests/test-runner/bin/test-runner.py +++ b/tests/test-runner/bin/test-runner.py @@ -307,7 +307,7 @@ def log(self, options): This function is responsible for writing all output. This includes the console output, the logfile of all results (with timestamped merged stdout and stderr), and for each test, the unmodified - stdout/stderr/merged in it's own file. + stdout/stderr/merged in its own file. """ logname = getpwuid(os.getuid()).pw_name @@ -716,7 +716,7 @@ def complete_outputdirs(self): def setup_logging(self, options): """ - This funtion creates the output directory and gets a file object + This function creates the output directory and gets a file object for the logfile. This function must be called before write_log() can be used. """ diff --git a/tests/test-runner/man/test-runner.1 b/tests/test-runner/man/test-runner.1 index 31cd412452b..95255073b70 100644 --- a/tests/test-runner/man/test-runner.1 +++ b/tests/test-runner/man/test-runner.1 @@ -103,7 +103,7 @@ The file has one section named "DEFAULT," which contains configuration option names and their values in "name = value" format. The values in this section apply to all the subsequent sections, unless they are also specified there, in which case the default is overridden. The remaining section names are the -absolute pathnames of files and direcotries, describing tests and test groups +absolute pathnames of files and directories, describing tests and test groups respectively. The legal option names are: .sp .ne 2 @@ -248,7 +248,7 @@ Run \fIscript\fR after any test or test group. \fB-q\fR .ad .RS 6n -Print only the results sumary to the standard output. +Print only the results summary to the standard output. .RE .ne 2 From 7e9391730927d9d916471d84b550d6e033fff922 Mon Sep 17 00:00:00 2001 From: George Wilson Date: Mon, 2 Sep 2019 22:17:51 -0400 Subject: [PATCH 211/325] maxinflight can overflow in spa_load_verify_cb() When running on larger memory systems, we can overflow the value of maxinflight. This can result in maxinflight having a value of 0 causing the system to hang. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: George Wilson Closes #9272 --- module/zfs/spa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7b2a2081fd0..7a20330c187 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -2137,7 +2137,8 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (!BP_IS_METADATA(bp) && !spa_load_verify_data) return (0); - int maxinflight_bytes = arc_target_bytes() >> spa_load_verify_shift; + uint64_t maxinflight_bytes = + arc_target_bytes() >> spa_load_verify_shift; zio_t *rio = arg; size_t size = BP_GET_PSIZE(bp); From 068c5495f07f5387300b2b2dd59cd2bee11a2685 Mon Sep 17 00:00:00 2001 From: Igor K Date: Tue, 3 Sep 2019 20:46:41 +0300 Subject: [PATCH 212/325] ZTS: Fix removal_cancel.ksh Create a larger file to extend the time required to perform the removal. Occasional failures were observed due to the removal completing before the cancel could be requested. Reviewed-by: George Melikov Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #9259 --- tests/zfs-tests/tests/functional/removal/removal_cancel.ksh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh index e7fa6abb8bc..afb318ef216 100755 --- a/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh +++ b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh @@ -55,11 +55,11 @@ log_must default_setup_noexit "$REMOVEDISK" # # Create a file of size 1GB and then do some random writes. -# Since randwritecomp does 8K writes we do 12500 writes -# which means we write ~100MB to the vdev. +# Since randwritecomp does 8K writes we do 25000 writes +# which means we write ~200MB to the vdev. # log_must mkfile -n 1g $SAMPLEFILE -log_must randwritecomp $SAMPLEFILE 12500 +log_must randwritecomp $SAMPLEFILE 25000 # # Add second device where all the data will be evacuated. From 5cb46afcf1f945e77bba71e590e5f002173a9618 Mon Sep 17 00:00:00 2001 From: Igor K Date: Tue, 3 Sep 2019 22:12:31 +0300 Subject: [PATCH 213/325] Fix panic on DilOS with kstat per dataset statistics Account for ZFS_MAX_DATASET_NAME_LEN in kstat data size. This value is ignored in the Linux kstat code but resolves the issue for other platforms. Reviewed-by: Serapheim Dimitropoulos Reviewed-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #9254 Closes #9151 --- module/zfs/dataset_kstats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index 522825c42cc..e46a0926d55 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -135,6 +135,7 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) kstat->ks_data = dk_kstats; kstat->ks_update = dataset_kstats_update; kstat->ks_private = dk; + kstat->ks_data_size += ZFS_MAX_DATASET_NAME_LEN; kstat_install(kstat); dk->dk_kstats = kstat; From 8c9c049502efcfb4a742afc432e1991ff917ea0a Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Tue, 3 Sep 2019 15:44:08 -0400 Subject: [PATCH 214/325] Use the right booleans TRUE and FALSE happen to be defined, but we should use B_TRUE and B_FALSE for the sake of consistency. Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Ryan Moeller Closes #9264 --- tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index a59bededb54..ef388eaef47 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -827,11 +827,11 @@ enum zfs_ioc_ref { boolean_t validate_ioc_values(void) { - boolean_t result = TRUE; + boolean_t result = B_TRUE; #define CHECK(expr) do { \ if (!(expr)) { \ - result = FALSE; \ + result = B_FALSE; \ fprintf(stderr, "(%s) === FALSE\n", #expr); \ } \ } while (0) From 27dda98b88b1db38bcc8c72d2c01c676f0743c42 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 5 Sep 2019 12:51:59 -0400 Subject: [PATCH 215/325] Refactor checksum operations in tests md5sum in particular but also sha256sum to a lesser extent is used in several areas of the test suite for computing checksums. The vast majority of invocations are followed by `| awk '{ print $1 }'`. Introduce functions to wrap up `md5sum $file | awk '{ print $1 }'` and likewise for sha256sum. These also serve as a convenient interface for alternative implementations on other platforms. Reviewed-by: Igor Kozhukhov Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9280 --- tests/zfs-tests/include/libtest.shlib | 22 +++++++++++++++++++ .../zfs_receive_from_encrypted.ksh | 6 ++--- .../cli_root/zfs_receive/zfs_receive_raw.ksh | 8 +++---- .../zfs_receive_raw_incremental.ksh | 4 ++-- .../import_cachefile_shared_device.ksh | 2 +- .../cli_root/zpool_import/zpool_import.kshlib | 20 ++++++++++++----- .../zpool_reopen/zpool_reopen_003_pos.ksh | 9 ++++---- .../functional/history/history_003_pos.ksh | 6 ++--- .../tests/functional/rsend/rsend.kshlib | 11 +++------- .../tests/functional/rsend/send-c_volume.ksh | 10 ++++----- .../rsend/send-wDR_encrypted_zvol.ksh | 4 ++-- .../functional/rsend/send_encrypted_props.ksh | 12 +++++----- .../functional/slog/slog_replay_fs_001.ksh | 14 +++++------- .../functional/slog/slog_replay_volume.ksh | 6 +++-- 14 files changed, 76 insertions(+), 58 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 5281c89effb..bc76f09a03c 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3562,3 +3562,25 @@ function mdb_ctf_set_int return 0 } + +# +# Compute MD5 digest for given file or stdin if no file given. +# Note: file path must not contain spaces +# +function md5digest +{ + typeset file=$1 + + md5sum -b $file | awk '{ print $1 }' +} + +# +# Compute SHA256 digest for given file or stdin if no file given. +# Note: file path must not contain spaces +# +function sha256digest +{ + typeset file=$1 + + sha256sum -b $file | awk '{ print $1 }' +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh index de771ccf395..a1d094bdb4b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -59,7 +59,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS2" log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) log_must zfs snapshot $snap @@ -69,14 +69,14 @@ log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1) [[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" log_note "Verify ZFS can receive into an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1" -typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS2/c1/$TESTFILE0 | awk '{ print $1 }') +typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS2/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh index e2e2c5f010f..7d5606acea0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -60,8 +60,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS1" log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | \ - awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap @@ -74,7 +73,7 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2) log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" @@ -85,8 +84,7 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1) log_fail "Expected keystatus unavailable, got $keystatus" log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1" -typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | \ - awk '{ print $1 }') +typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh index 1e91c6262c4..c52a12e78ac 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh @@ -69,7 +69,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_must zfs snapshot $snap1 log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap2 @@ -89,7 +89,7 @@ log_must zfs unload-key $TESTPOOL/$TESTFS2 log_must eval "zfs receive $TESTPOOL/$TESTFS2 < $ibackup" log_must eval "echo $passphrase2 | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh index 23d79c69075..887993dfd1e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh @@ -50,7 +50,7 @@ function dev_checksum log_note "Compute checksum of '$dev'" - checksum=$(md5sum $dev) + checksum=$(md5digest $dev) if [[ $? -ne 0 ]]; then log_fail "Failed to compute checksum of '$dev'" return 1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib index d050145e44f..c365ec4adb2 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib @@ -79,10 +79,10 @@ function write_some_data # # Create/overwrite a few datasets with files. -# Apply md5sum on all the files and store checksums in a file. +# Checksum all the files and store digests in a file. # # newdata: overwrite existing files if false. -# md5file: file where to store md5sums +# md5file: file where to store md5 digests # datasetname: base name for datasets # function _generate_data_common @@ -102,7 +102,10 @@ function _generate_data_common for j in {1..$files}; do typeset file="/$pool/$datasetname$i/file$j" dd if=/dev/urandom of=$file bs=128k count=$blocks > /dev/null - [[ -n $md5file ]] && md5sum $file >> $md5file + if [[ -n $md5file ]]; then + typeset cksum=$(md5digest $file) + echo $cksum $file >> $md5file + fi done ( $newdata ) && sync_pool "$pool" done @@ -140,8 +143,15 @@ function verify_data_md5sums return 1 fi - md5sum -c --quiet $md5file - return $? + cat $md5file | \ + while read digest file; do + typeset digest1=$(md5digest $file) + if [[ "$digest1" != "$digest" ]]; then + return 1 + fi + done + + return 0 } # diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh index 6ac74881846..097dd3c71d1 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh @@ -42,7 +42,6 @@ verify_runnable "global" function cleanup { log_must zinject -c all - rm -f $TESTFILE_MD5 2>/dev/null # bring back removed disk online for further tests insert_disk $REMOVED_DISK $scsi_host poolexists $TESTPOOL && destroy_pool $TESTPOOL @@ -64,9 +63,8 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "unavail" # 3. Write a test file to the pool and calculate its checksum. TESTFILE=/$TESTPOOL/data -TESTFILE_MD5=$(mktemp --tmpdir=/var/tmp) log_must generate_random_file /$TESTPOOL/data $LARGE_FILE_SIZE -log_must md5sum $TESTFILE > $TESTFILE_MD5 +TESTFILE_MD5=$(md5digest $TESTFILE) # 4. Execute scrub. # add delay to I/O requests for remaining disk in pool @@ -90,12 +88,13 @@ log_must is_scan_restarted $TESTPOOL # 8. Put another device offline and check if the test file checksum is correct. log_must zpool offline $TESTPOOL $DISK2 -log_must md5sum -c $TESTFILE_MD5 +CHECK_MD5=$(md5digest $TESTFILE) +[[ $CHECK_MD5 == $TESTFILE_MD5 ]] || \ + log_fail "Checksums differ ($CHECK_MD5 != $TESTFILE_MD5)" log_must zpool online $TESTPOOL $DISK2 sleep 1 # clean up -rm -f $TESTFILE_MD5 2>/dev/null log_must zpool destroy $TESTPOOL log_pass "Zpool reopen test successful" diff --git a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh index 4ecee3ba0c5..46af53f8af9 100755 --- a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh @@ -65,9 +65,7 @@ log_must zpool create $spool $VDEV0 log_must zfs create $spool/$sfs typeset -i orig_count=$(zpool history $spool | wc -l) -typeset orig_md5=$(zpool history $spool | head -2 | md5sum | \ - awk '{print $1}') - +typeset orig_md5=$(zpool history $spool | head -2 | md5digest) typeset -i i=0 while ((i < 300)); do zfs set compression=off $spool/$sfs @@ -82,7 +80,7 @@ done TMPFILE=$TEST_BASE_DIR/spool.$$ zpool history $spool >$TMPFILE typeset -i entry_count=$(wc -l $TMPFILE | awk '{print $1}') -typeset final_md5=$(head -2 $TMPFILE | md5sum | awk '{print $1}') +typeset final_md5=$(head -2 $TMPFILE | md5digest) grep 'zpool create' $TMPFILE >/dev/null 2>&1 || log_fail "'zpool create' was not found in pool history" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index f51786083f3..7f88f55a0e8 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -158,14 +158,9 @@ function cmp_md5s { typeset file1=$1 typeset file2=$2 - eval md5sum $file1 | awk '{ print $1 }' > $BACKDIR/md5_file1 - eval md5sum $file2 | awk '{ print $1 }' > $BACKDIR/md5_file2 - diff $BACKDIR/md5_file1 $BACKDIR/md5_file2 - typeset -i ret=$? - - rm -f $BACKDIR/md5_file1 $BACKDIR/md5_file2 - - return $ret + typeset sum1=$(md5digest $file1) + typeset sum2=$(md5digest $file2) + test "$sum1" = "$sum2" } # diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh index caaf07ccb7a..988ed91b991 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh @@ -49,8 +49,8 @@ typeset megs=8 log_must zfs create -V 256m -o compress=lz4 $vol write_compressible $BACKDIR ${megs}m 2 -md5_1=$(md5sum $data1 | awk '{print $1}') -md5_2=$(md5sum $data2 | awk '{print $1}') +md5_1=$(md5digest $data1) +md5_2=$(md5digest $data2) log_must dd if=$data1 of=$voldev bs=1024k log_must zfs snapshot $vol@snap @@ -60,8 +60,7 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/full" verify_stream_size $BACKDIR/full $vol verify_stream_size $BACKDIR/full $vol2 -md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5sum | \ - awk '{print $1}') +md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1" # Repeat, for an incremental send @@ -73,8 +72,7 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/inc" verify_stream_size $BACKDIR/inc $vol 90 $vol@snap verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap -md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5sum | \ - awk '{print $1}') +md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2" log_pass "Verify compressed send works with volumes" diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh index 443887bfa23..0a7ae74822d 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh @@ -86,8 +86,8 @@ block_device_wait log_must mount $recvdev $recvmnt -md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}') -md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}') +md5_1=$(cat $mntpnt/* | md5digest) +md5_2=$(cat $recvmnt/* | md5digest) [[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2" log_pass "zfs can receive raw, recursive, and deduplicated send streams" diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh index a216f1c5ff7..4c90ba95bf9 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh @@ -75,7 +75,7 @@ log_must zfs create -o keyformat=passphrase -o keylocation=file://$keyfile \ log_must mkfile 1M /$TESTPOOL/ds/$TESTFILE0 log_must cp /$TESTPOOL/ds/$TESTFILE0 /$TESTPOOL/crypt/$TESTFILE0 -typeset cksum=$(md5sum /$TESTPOOL/ds/$TESTFILE0 | awk '{ print $1 }') +typeset cksum=$(md5digest /$TESTPOOL/ds/$TESTFILE0) log_must zfs snap -r $snap log_must zfs snap -r $esnap @@ -127,7 +127,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -143,7 +143,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -161,7 +161,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -175,7 +175,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -189,7 +189,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 3e5bccd2ef1..8954caa1c93 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -58,14 +58,8 @@ verify_runnable "global" -function cleanup_fs -{ - rm -f $TESTDIR/checksum - cleanup -} - log_assert "Replay of intent log succeeds." -log_onexit cleanup_fs +log_onexit cleanup log_must setup # @@ -115,7 +109,7 @@ log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete # Create a simple validation payload log_must mkdir -p $TESTDIR log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8 -log_must eval "sha256sum -b /$TESTPOOL/$TESTFS/payload >$TESTDIR/checksum" +typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload) # TX_WRITE (small file with ordering) log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file @@ -210,6 +204,8 @@ log_note "Verify working set diff:" log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy log_note "Verify file checksum:" -log_must sha256sum -c $TESTDIR/checksum +typeset checksum1=$(sha256digest /$TESTPOOL/$TESTFS/payload) +[[ "$checksum1" == "$checksum" ]] || \ + log_fail "checksum mismatch ($checksum1 != $checksum)" log_pass "Replay of intent log succeeds." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index a72c83b5bfc..f513d04fe18 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -128,7 +128,7 @@ fi # # 4. Generate checksums for all ext4 files. # -log_must sha256sum -b $MNTPNT/* >$TESTDIR/checksum +typeset checksum=$(cat $MNTPNT/* | sha256digest) # # 5. Unmount filesystem and export the pool @@ -160,6 +160,8 @@ log_note "Verify current block usage:" log_must zdb -bcv $TESTPOOL log_note "Verify checksums" -log_must sha256sum -c $TESTDIR/checksum +typeset checksum1=$(cat $MNTPNT/* | sha256digest) +[[ "$checksum1" == "$checksum" ]] || \ + log_fail "checksum mismatch ($checksum1 != $checksum)" log_pass "Replay of intent log succeeds." From cea50025fdeb20416fe05e863914c385adc184ed Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 5 Sep 2019 19:20:09 -0400 Subject: [PATCH 216/325] Clean up zfs_clone_010_pos Remove a lot of unnecessary setting and incrementing of `i`. Remove unused variable `j`. Instead of calling out to Python in a loop to generate the same string repeatedly, generate the string once using shell constructs before entering the loop. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Reviewed-by: Richard Elling Signed-off-by: Ryan Moeller Closes #9284 --- .../cli_root/zfs_clone/zfs_clone_010_pos.ksh | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh index 62a755eaeef..dcf80095db2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh @@ -143,7 +143,6 @@ datasets="$TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1/$TESTFS2 typeset -a d_clones typeset -a deferred_snaps typeset -i i -i=1 log_must setup_ds log_note "Verify zfs clone property for multiple clones" @@ -157,19 +156,16 @@ for ds in $datasets; do ((i=i+1)) done names=$(zfs list -rt all -o name $TESTPOOL) -i=1 log_must verify_clones 2 1 log_must local_cleanup log_must setup_ds log_note "verify zfs deferred destroy on clones property" -i=1 names=$(zfs list -rt all -o name $TESTPOOL) for ds in $datasets; do log_must zfs destroy -d $ds@snap deferred_snaps=( "${deferred_snaps[@]}" "$ds@snap" ) - ((i=i+1)) done log_must verify_clones 3 0 @@ -206,17 +202,14 @@ for ds in $datasets; do done names=$(zfs list -rt all -o name,clones $TESTPOOL) log_must verify_clones 3 1 $TESTCLONE -i=1 for ds in $datasets; do log_must zfs promote $ds - ((i=i+1)) done log_must local_cleanup log_note "verify clone list truncated correctly" -typeset -i j=200 -i=1 fs=$TESTPOOL/$TESTFS1 +xs=""; for i in {1..200}; do xs+="x"; done if is_linux; then ZFS_MAXPROPLEN=4096 else @@ -224,10 +217,8 @@ else fi log_must zfs create $fs log_must zfs snapshot $fs@snap -while((i <= $(( ZFS_MAXPROPLEN/200+1 )))); do - log_must zfs clone $fs@snap $fs/$TESTCLONE$(python -c 'print "x" * 200').$i - ((i=i+1)) - ((j=j+200)) +for (( i = 1; i <= (ZFS_MAXPROPLEN / 200 + 1); i++ )); do + log_must zfs clone ${fs}@snap ${fs}/${TESTCLONE}${xs}.${i} done clone_list=$(zfs list -o clones $fs@snap) char_count=$(echo "$clone_list" | tail -1 | wc | awk '{print $3}') From ffe29e7e3d261189c0a972062964200b9ac87525 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 5 Sep 2019 19:22:05 -0400 Subject: [PATCH 217/325] Fix noop receive of raw send stream Currently, the noop receive code fails to work with raw send streams and resuming send streams. This happens because zfs_receive_impl() reads the DRR_BEGIN payload without reading the payload itself. Normally, the kernel expects to read this itself, but in this case the recv_skip() code runs instead and it is not prepared to handle the stream being left at any place other than the beginning of a record. This patch resolves this issue by manually reading the DRR_BEGIN payload in the dry-run case. This patch also includes a number of small fixups in this code path. Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Reviewed-by: Paul Dagnelie Signed-off-by: Tom Caputi Closes #9221 Closes #9173 --- lib/libzfs/libzfs_sendrecv.c | 39 ++++++++++++++++--- .../cli_root/zfs_receive/zfs_receive_raw.ksh | 3 ++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 12a4b500ed1..20a59ef6cff 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -3438,10 +3438,11 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); + uint64_t payload_size; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive:")); + "cannot receive")); /* XXX would be great to use lseek if possible... */ drr = buf; @@ -3468,9 +3469,14 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) drr->drr_u.drr_object.drr_bonuslen = BSWAP_32(drr->drr_u.drr_object. drr_bonuslen); + drr->drr_u.drr_object.drr_raw_bonuslen = + BSWAP_32(drr->drr_u.drr_object. + drr_raw_bonuslen); } - (void) recv_read(hdl, fd, buf, - P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), + + payload_size = + DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object); + (void) recv_read(hdl, fd, buf, payload_size, B_FALSE, NULL); break; @@ -3483,7 +3489,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) BSWAP_64( drr->drr_u.drr_write.drr_compressed_size); } - uint64_t payload_size = + payload_size = DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write); (void) recv_read(hdl, fd, buf, payload_size, B_FALSE, NULL); @@ -3492,9 +3498,15 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) if (byteswap) { drr->drr_u.drr_spill.drr_length = BSWAP_64(drr->drr_u.drr_spill.drr_length); + drr->drr_u.drr_spill.drr_compressed_size = + BSWAP_64(drr->drr_u.drr_spill. + drr_compressed_size); } - (void) recv_read(hdl, fd, buf, - drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); + + payload_size = + DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill); + (void) recv_read(hdl, fd, buf, payload_size, + B_FALSE, NULL); break; case DRR_WRITE_EMBEDDED: if (byteswap) { @@ -4232,6 +4244,21 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } if (flags->dryrun) { + void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); + + /* + * We have read the DRR_BEGIN record, but we have + * not yet read the payload. For non-dryrun sends + * this will be done by the kernel, so we must + * emulate that here, before attempting to read + * more records. + */ + err = recv_read(hdl, infd, buf, drr->drr_payloadlen, + flags->byteswap, NULL); + free(buf); + if (err != 0) + goto out; + err = recv_skip(hdl, infd, flags->byteswap); goto out; } diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh index 7d5606acea0..9740caf7250 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -36,6 +36,7 @@ # 9. Verify the key is unavailable # 10. Attempt to load the key and mount the dataset # 11. Verify the checksum of the file is the same as the original +# 12. Verify 'zfs receive -n' works with the raw stream # verify_runnable "both" @@ -88,4 +89,6 @@ typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" +log_must eval "zfs send -w $snap | zfs receive -n $TESTPOOL/$TESTFS3" + log_pass "ZFS can receive streams from raw sends" From 4818563f85bf825df92ebc2ac1076443ad7af036 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Mon, 9 Sep 2019 19:04:05 -0400 Subject: [PATCH 218/325] Clean up do_vol_test in zfs_copies tests Get rid of the `get_used_prop` function. `get_prop used` works fine. Fix the comment describing the function parameters. The type does not have a default, and mntp is also used for ext2. Rename the variable for the number of copies from `copy` to `copies`. Use a `case` statement to match the type parameter, order the cases alphabetically, and add a little sanity checking for good measure. Use eval to make sure the output of commands is silenced rather than the log messages when redirecting output to /dev/null. Simplify cases where zfs requires special behavior. Don't allow the test to loop forever in the event space usage does not change. Bail out of the loop and fail after an arbitrary number of iterations. Add more information to the log message when the test fails, to help debugging. Reviewed-by: John Kennedy Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9286 --- .../cli_root/zfs_copies/zfs_copies.kshlib | 77 +++++++++---------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib index a86b2f78f86..b0ced58c9f7 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib @@ -49,19 +49,6 @@ function cmp_prop fi } -# -# Get the value of property used via zfs list -# $1, the dataset name -# -function get_used_prop -{ - typeset ds=$1 - typeset used - - used=`zfs list -H -p -o used $ds` - echo $used -} - # # Check the used space is charged correctly # $1, the number of used space @@ -85,64 +72,72 @@ function check_used # # test ncopies on volume -# $1 test type zfs|ufs, default zfs +# $1 test type zfs|ufs|ext2 # $2 copies -# $3 mntp for ufs test +# $3 mntp for ufs|ext2 test function do_vol_test { typeset type=$1 - typeset copy=$2 + typeset copies=$2 typeset mntp=$3 vol=$TESTPOOL/$TESTVOL1 vol_b_path=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL1 vol_r_path=$ZVOL_RDEVDIR/$TESTPOOL/$TESTVOL1 - log_must zfs create -V $VOLSIZE -o copies=$copy $vol + log_must zfs create -V $VOLSIZE -o copies=$copies $vol log_must zfs set refreservation=none $vol block_device_wait - if [[ $type == "ufs" ]]; then - log_must echo y | newfs $vol_r_path >/dev/null 2>&1 - log_must mount -F ufs -o rw $vol_b_path $mntp - elif [[ $type == "ext2" ]]; then - log_must echo y | newfs $vol_r_path >/dev/null 2>&1 + case "$type" in + "ext2") + log_must eval "echo y | newfs $vol_r_path >/dev/null 2>&1" log_must mount -o rw $vol_b_path $mntp - else + ;; + "ufs") + if is_linux; then + log_unsupported "ufs test not implemented for linux" + fi + log_must eval "newfs $vol_r_path >/dev/null 2>&1" + log_must mount $vol_b_path $mntp + ;; + "zfs") log_must zpool create $TESTPOOL1 $vol_b_path log_must zfs create $TESTPOOL1/$TESTFS1 - fi - - ((nfilesize = copy * ${FILESIZE%m})) - pre_used=$(get_used_prop $vol) + ;; + *) + log_unsupported "$type test not implemented" + ;; + esac + + ((nfilesize = copies * ${FILESIZE%m})) + pre_used=$(get_prop used $vol) ((target_size = pre_used + nfilesize)) - if [[ $type == "ufs" ]]; then - log_must mkfile $FILESIZE $mntp/$FILE - elif [[ $type == "ext2" ]]; then - log_must mkfile $FILESIZE $mntp/$FILE - else + if [[ $type == "zfs" ]]; then log_must mkfile $FILESIZE /$TESTPOOL1/$TESTFS1/$FILE + else + log_must mkfile $FILESIZE $mntp/$FILE fi - post_used=$(get_used_prop $vol) - while ((post_used < target_size)) ; do + post_used=$(get_prop used $vol) + ((retries = 0)) + while ((post_used < target_size && retries++ < 42)); do sleep 1 - post_used=$(get_used_prop $vol) + post_used=$(get_prop used $vol) done ((used = post_used - pre_used)) if ((used < nfilesize)); then log_fail "The space is not charged correctly while setting" \ - "copies as $copy" + "copies as $copies ($used < $nfilesize)" \ + "pre=${pre_used} post=${post_used}" fi - if [[ $type == "ufs" ]]; then - umount $mntp - elif [[ $type == "ext2" ]]; then - umount $mntp - else + if [[ $type == "zfs" ]]; then log_must zpool destroy $TESTPOOL1 + else + log_must umount $mntp fi log_must zfs destroy $vol From 43258fb78c4a20a46dc22ef99281b8f9b634916c Mon Sep 17 00:00:00 2001 From: John Wren Kennedy Date: Mon, 9 Sep 2019 17:11:07 -0600 Subject: [PATCH 219/325] ZTS: Introduce targeted corruption in file blocks filetest_001_pos verifies that various checksum algorithms detect corruption by overwriting the underlying vdev on which a file resides. It is possible for the overwrite to miss the blocks of a file, causing a spurious failure. This change introduces a function to corrupt the individual blocks of a file as determined by zdb. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: John Kennedy Closes #9288 --- tests/zfs-tests/include/blkdev.shlib | 88 ++++++++++++++++++- .../functional/checksum/filetest_001_pos.ksh | 27 ++---- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 87500e92a39..af3324683b0 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -12,7 +12,7 @@ # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2012, 2019 by Delphix. All rights reserved. # Copyright 2016 Nexenta Systems, Inc. # Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. # Copyright (c) 2017 Lawrence Livermore National Security, LLC. @@ -465,3 +465,89 @@ function get_pool_devices #testpool #devdir fi echo $out } + +# +# Write to standard out giving the level, device name, offset and length +# of all blocks in an input file. The offset and length are in units of +# 512 byte blocks. In the case of mirrored vdevs, only the first +# device is listed, as the levels, blocks and offsets will be the same +# on other devices. Note that this function only works with mirrored +# or non-redundant pools, not raidz. +# +# The output of this function can be used to introduce corruption at +# varying levels of indirection. +# +function list_file_blocks # input_file +{ + typeset input_file=$1 + + [[ -f $input_file ]] || log_fail "Couldn't find $input_file" + + typeset ds="$(zfs list -H -o name $input_file)" + typeset pool="${ds%%/*}" + typeset inum="$(stat -c '%i' $input_file)" + + # + # Establish a mapping between vdev ids as shown in a DVA and the + # pathnames they correspond to in ${VDEV_MAP[]}. + # + eval $(zdb -C $pool | awk ' + BEGIN { + printf("typeset VDEV_MAP\n"); + looking = 0; + } + /^ children/ { + id = $1; + looking = 1; + } + /path: / && looking == 1 { + print id" "$2; + looking = 0; + } + ' | sed -n 's/^children\[\([0-9]\)\]: \(.*\)$/VDEV_MAP[\1]=\2/p') + + # + # The awk below parses the output of zdb, printing out the level + # of each block along with vdev id, offset and length. The last + # two are converted to decimal in the while loop. 4M is added to + # the offset to compensate for the first two labels and boot + # block. Lastly, the offset and length are printed in units of + # 512b blocks for ease of use with dd. + # + log_must zpool sync -f + typeset level path offset length + zdb -ddddd $ds $inum | awk -F: ' + BEGIN { looking = 0 } + /^Indirect blocks:/ { looking = 1} + /^\t\tsegment / { looking = 0} + /L[0-8]/ && looking == 1 { print $0} + ' | sed -n 's/^.*\(L[0-9]\) \([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \ + while read level path offset length; do + offset=$((16#$offset)) # Conversion from hex + length=$((16#$length)) + offset="$(((offset + 4 * 1024 * 1024) / 512))" + length="$((length / 512))" + echo "$level ${VDEV_MAP[$path]} $offset $length" + done 2>/dev/null +} + +function corrupt_blocks_at_level # input_file corrupt_level +{ + typeset input_file=$1 + typeset corrupt_level="L${2:-0}" + typeset level path offset length + + [[ -f $input_file ]] || log_fail "Couldn't find $input_file" + + + log_must list_file_blocks $input_file | \ + while read level path offset length; do + if [[ $level = $corrupt_level ]]; then + log_must dd if=/dev/urandom of=$path bs=512 \ + count=$length seek=$offset conv=notrunc + fi + done + + # This is necessary for pools made of loop devices. + sync +} diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh index ccc60a661d0..27dad072631 100755 --- a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh @@ -21,7 +21,7 @@ # # -# Copyright (c) 2018 by Delphix. All rights reserved. +# Copyright (c) 2018, 2019 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -32,8 +32,8 @@ # Sanity test to make sure checksum algorithms work. # For each checksum, create a file in the pool using that checksum. Verify # that there are no checksum errors. Next, for each checksum, create a single -# file in the pool using that checksum, scramble the underlying vdev, and -# verify that we correctly catch the checksum errors. +# file in the pool using that checksum, corrupt the file, and verify that we +# correctly catch the checksum errors. # # STRATEGY: # Test 1 @@ -46,11 +46,9 @@ # Test 2 # 6. For each checksum: # 7. Create a file using the checksum -# 8. Export the pool -# 9. Scramble the data on one of the underlying VDEVs -# 10. Import the pool -# 11. Scrub the pool -# 12. Verify that there are checksum errors +# 8. Corrupt all level 0 blocks in the file +# 9. Scrub the pool +# 10. Verify that there are checksum errors verify_runnable "both" @@ -66,8 +64,6 @@ log_assert "Create and read back files with using different checksum algorithms" log_onexit cleanup WRITESZ=1048576 -SKIPCNT=$(((4194304 / $WRITESZ) * 2)) -WRITECNT=$((($MINVDEVSIZE / $WRITESZ) - $SKIPCNT)) # Get a list of vdevs in our pool set -A array $(get_disklist_fullpath) @@ -96,7 +92,7 @@ log_must [ $cksum -eq 0 ] rm -fr $TESTDIR/* -log_assert "Test scrambling the disk and seeing checksum errors" +log_assert "Test corrupting the files and seeing checksum errors" typeset -i j=1 while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do type=${CHECKSUM_TYPES[$j]} @@ -104,14 +100,9 @@ while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do log_must file_write -o overwrite -f $TESTDIR/test_$type \ -b $WRITESZ -c 5 -d R - log_must zpool export $TESTPOOL + # Corrupt the level 0 blocks of this file + corrupt_blocks_at_level $TESTDIR/test_$type - # Scramble the data on the first vdev in our pool. Skip the first - # and last 16MB of data, then scramble the rest after that. - log_must dd if=/dev/zero of=$firstvdev bs=$WRITESZ skip=$SKIPCNT \ - count=$WRITECNT - - log_must zpool import $TESTPOOL log_must zpool scrub $TESTPOOL log_must wait_scrubbed $TESTPOOL From 8747ee451366b248d05f797622cb1773a0deffe3 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Wed, 11 Sep 2019 14:16:48 -0400 Subject: [PATCH 220/325] Fix stalled txg with repeated noop scans Currently, the DSL scan code figures out when it should suspend processing and allow a txg to continue by calling the function dsl_scan_check_suspend(). Unfortunately, this function only allows the scan to suspend at a level 0 block. In the event that the system is scanning a bunch of empty snapshots or a resilver is running with a high enough scn_cur_min_txg, the scan will stop processing each dataset at the root level, deciding it has nothing left to do. This means that the check_suspend function is never called and the txg remains stuck until a dataset is found that has data to scan. This patch fixes the problem by allowing scans to suspend at the root level of the objset. For backwards compatibility, we use the bookmark when we suspend here so that older versions of the code will work as intended. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #9300 --- module/zfs/dsl_scan.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index c37e77be44a..f1d995b3d3a 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1275,8 +1275,8 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ - /* We only know how to resume from level-0 blocks. */ - if (zb && zb->zb_level != 0) + /* We only know how to resume from level-0 and objset blocks. */ + if (zb && (zb->zb_level != 0 && zb->zb_level != ZB_ROOT_LEVEL)) return (B_FALSE); /* @@ -1307,7 +1307,16 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) || spa_shutting_down(scn->scn_dp->dp_spa) || (zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) { - if (zb) { + if (zb && zb->zb_level == ZB_ROOT_LEVEL) { + dprintf("suspending at first available bookmark " + "%llx/%llx/%llx/%llx\n", + (longlong_t)zb->zb_objset, + (longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (longlong_t)zb->zb_blkid); + SET_BOOKMARK(&scn->scn_phys.scn_bookmark, + zb->zb_objset, 0, 0, 0); + } else if (zb != NULL) { dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, From 088f97b9210f646c642d5dd7d517c46beff1c0dd Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 12 Sep 2019 16:32:32 -0400 Subject: [PATCH 221/325] Canonicalize Python shebangs /usr/bin/env python3 is the suggested[1] shebang for Python in general (likewise for python2) and is conventional across platforms. This eases development on systems where python is not installed in /usr/bin (FreeBSD for example) and makes it possible to develop in virtual environments (venv) for isolating dependencies. Many packaging guidelines discourage the use of /usr/bin/env, but since this is the canonical way of writing shebangs in the Python community, many packaging scripts are already equipped to handle substituting the appropriate absolute path to python automatically. Some RPM package builders lacking brp-mangle-shebangs need a small fallback mechanism in the package spec to stamp the appropriate shebang on installed Python scripts. [1]: https://docs.python.org/3/using/unix.html?#miscellaneous Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Ryan Moeller Closes #9314 --- cmd/arc_summary/arc_summary2 | 2 +- cmd/arc_summary/arc_summary3 | 2 +- cmd/arcstat/Makefile.am | 2 +- cmd/arcstat/arcstat | 2 +- cmd/dbufstat/Makefile.am | 2 +- cmd/dbufstat/dbufstat | 2 +- rpm/generic/zfs.spec.in | 12 ++++++++---- tests/test-runner/bin/Makefile.am | 2 +- tests/test-runner/bin/test-runner.py | 2 +- tests/test-runner/bin/zts-report.py | 2 +- 10 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cmd/arc_summary/arc_summary2 b/cmd/arc_summary/arc_summary2 index ab4a3c574a5..1326d9e627f 100755 --- a/cmd/arc_summary/arc_summary2 +++ b/cmd/arc_summary/arc_summary2 @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/env python2 # # $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $ # diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index d3327143849..7bee77061d5 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright (c) 2008 Ben Rockwood , # Copyright (c) 2010 Martin Matuska , diff --git a/cmd/arcstat/Makefile.am b/cmd/arcstat/Makefile.am index 2d59faa9c87..8166778a13e 100644 --- a/cmd/arcstat/Makefile.am +++ b/cmd/arcstat/Makefile.am @@ -8,6 +8,6 @@ dist_bin_SCRIPTS = arcstat # if USING_PYTHON_2 install-exec-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(bindir)/arcstat endif diff --git a/cmd/arcstat/arcstat b/cmd/arcstat/arcstat index 57a2d621f34..003499928f3 100755 --- a/cmd/arcstat/arcstat +++ b/cmd/arcstat/arcstat @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Print out ZFS ARC Statistics exported via kstat(1) # For a definition of fields, or usage, use arctstat.pl -v diff --git a/cmd/dbufstat/Makefile.am b/cmd/dbufstat/Makefile.am index 06923d38b2e..a3f0c6e50d7 100644 --- a/cmd/dbufstat/Makefile.am +++ b/cmd/dbufstat/Makefile.am @@ -8,6 +8,6 @@ dist_bin_SCRIPTS = dbufstat # if USING_PYTHON_2 install-exec-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(bindir)/dbufstat endif diff --git a/cmd/dbufstat/dbufstat b/cmd/dbufstat/dbufstat index e6c947fbcbd..4a57d811301 100755 --- a/cmd/dbufstat/dbufstat +++ b/cmd/dbufstat/dbufstat @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Print out statistics for all cached dmu buffers. This information # is available through the dbufs kstat and may be post-processed as diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 5ef6f7bcfa3..545627d4bfb 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -53,10 +53,6 @@ %bcond_with asan %bcond_with systemd -# Exclude test-runner.py from the rpmbuild shebang check to allow it to run -# under Python 2 and 3. -%global __brp_mangle_shebangs_exclude_from test-runner.py - # Generic enable switch for systemd %if %{with systemd} %define _systemd 1 @@ -354,6 +350,14 @@ make %{?_smp_mflags} %{__rm} -rf $RPM_BUILD_ROOT make install DESTDIR=%{?buildroot} find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \; +%if 0%{!?__brp_mangle_shebangs:1} +find %{?buildroot}%{_bindir} \ + \( -name arc_summary -or -name arcstat -or -name dbufstat \) \ + -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \; +find %{?buildroot}%{_datadir} \ + \( -name test-runner.py -or -name zts-report.py \) \ + -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \; +%endif %post %if 0%{?_systemd} diff --git a/tests/test-runner/bin/Makefile.am b/tests/test-runner/bin/Makefile.am index e1ae21548e9..2c031f74550 100644 --- a/tests/test-runner/bin/Makefile.am +++ b/tests/test-runner/bin/Makefile.am @@ -9,7 +9,7 @@ dist_pkgdata_SCRIPTS = \ # if USING_PYTHON_2 install-data-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(pkgdatadir)/test-runner.py \ $(DESTDIR)$(pkgdatadir)/zts-report.py endif diff --git a/tests/test-runner/bin/test-runner.py b/tests/test-runner/bin/test-runner.py index bf2c77c18a9..ca08b375411 100755 --- a/tests/test-runner/bin/test-runner.py +++ b/tests/test-runner/bin/test-runner.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # This file and its contents are supplied under the terms of the diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py index d046c13a55e..c5c869045ed 100755 --- a/tests/test-runner/bin/zts-report.py +++ b/tests/test-runner/bin/zts-report.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # This file and its contents are supplied under the terms of the From 5986c5c68734f6728cafdd3deae8a3df71cd093c Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 16 Sep 2019 13:07:33 -0400 Subject: [PATCH 222/325] Fix clone handling with encryption roots Currently, spa_keystore_change_key_sync_impl() does not recurse into clones when updating encryption roots for either a call to 'zfs promote' or 'zfs change-key'. This can cause children of these clones to end up in a state where they point to the wrong dataset as the encryption root. It can also trigger ASSERTs in some cases where the code checks reference counts on wrapping keys. This patch fixes this issue by ensuring that this function properly recurses into clones during processing. Reviewed-by: Brian Behlendorf Reviewed-by: Alek Pinchuk Signed-off-by: Tom Caputi Closes #9267 Closes #9294 --- module/zfs/dsl_crypt.c | 69 +++++++++++----- tests/runfiles/linux.run | 2 +- .../cli_root/zfs_change-key/Makefile.am | 1 + .../zfs_change-key/zfs_change-key_clones.ksh | 80 +++++++++++++++++++ .../zfs_promote_encryptionroot.ksh | 21 ++++- 5 files changed, 149 insertions(+), 24 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 271019e7902..1545af53af7 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1418,10 +1418,17 @@ spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) return (ret); } - +/* + * This function deals with the intricacies of updating wrapping + * key references and encryption roots recursively in the event + * of a call to 'zfs change-key' or 'zfs promote'. The 'skip' + * parameter should always be set to B_FALSE when called + * externally. + */ static void spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, - uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) + uint64_t new_rddobj, dsl_wrapping_key_t *wkey, boolean_t skip, + dmu_tx_t *tx) { zap_cursor_t *zc; zap_attribute_t *za; @@ -1435,7 +1442,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, /* hold the dd */ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); - /* ignore hidden dsl dirs */ + /* ignore special dsl dirs */ if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { dsl_dir_rele(dd, FTAG); return; @@ -1446,7 +1453,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, * or if this dd is a clone. */ VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); - if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { + if (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd))) { dsl_dir_rele(dd, FTAG); return; } @@ -1454,19 +1461,23 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, /* * If we don't have a wrapping key just update the dck to reflect the * new encryption root. Otherwise rewrap the entire dck and re-sync it - * to disk. + * to disk. If skip is set, we don't do any of this work. */ - if (wkey == NULL) { - VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, - DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); - } else { - VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, - FTAG, &dck)); - dsl_wrapping_key_hold(wkey, dck); - dsl_wrapping_key_rele(dck->dck_wkey, dck); - dck->dck_wkey = wkey; - dsl_crypto_key_sync(dck, tx); - spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + if (!skip) { + if (wkey == NULL) { + VERIFY0(zap_update(dp->dp_meta_objset, + dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, + &new_rddobj, tx)); + } else { + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, + FTAG, &dck)); + dsl_wrapping_key_hold(wkey, dck); + dsl_wrapping_key_rele(dck->dck_wkey, dck); + dck->dck_wkey = wkey; + dsl_crypto_key_sync(dck, tx); + spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + } } zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); @@ -1478,7 +1489,27 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { spa_keystore_change_key_sync_impl(rddobj, - za->za_first_integer, new_rddobj, wkey, tx); + za->za_first_integer, new_rddobj, wkey, B_FALSE, tx); + } + zap_cursor_fini(zc); + + /* + * Recurse into all dsl dirs of clones. We utilize the skip parameter + * here so that we don't attempt to process the clones directly. This + * is because the clone and its origin share the same dck, which has + * already been updated. + */ + for (zap_cursor_init(zc, dp->dp_meta_objset, + dsl_dir_phys(dd)->dd_clones); + zap_cursor_retrieve(zc, za) == 0; + zap_cursor_advance(zc)) { + dsl_dataset_t *clone; + + VERIFY0(dsl_dataset_hold_obj(dp, za->za_first_integer, + FTAG, &clone)); + spa_keystore_change_key_sync_impl(rddobj, + clone->ds_dir->dd_object, new_rddobj, wkey, B_TRUE, tx); + dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(zc); @@ -1558,7 +1589,7 @@ spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) /* recurse through all children and rewrap their keys */ spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, - new_rddobj, wkey, tx); + new_rddobj, wkey, B_FALSE, tx); /* * All references to the old wkey should be released now (if it @@ -1736,7 +1767,7 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, - target->dd_object, NULL, tx); + target->dd_object, NULL, B_FALSE, tx); rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(targetds, FTAG); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index a5fe26dfdda..7977724b823 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -122,7 +122,7 @@ tags = ['functional', 'cli_root', 'zfs_bookmark'] [tests/functional/cli_root/zfs_change-key] tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', - 'zfs_change-key_pbkdf2iters'] + 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones'] tags = ['functional', 'cli_root', 'zfs_change-key'] [tests/functional/cli_root/zfs_clone] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am index 7c67e7239b8..72d6e4700e1 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am @@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zfs_change-key.ksh \ zfs_change-key_child.ksh \ + zfs_change-key_clones.ksh \ zfs_change-key_inherit.ksh \ zfs_change-key_format.ksh \ zfs_change-key_load.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh new file mode 100755 index 00000000000..497fb99c810 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should correctly update encryption roots with clones. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create an encryption root child of the first dataset +# 3. Clone the child encryption root twice +# 4. Add inheriting children to the encryption root and each of the clones +# 5. Verify the encryption roots +# 6. Have the child encryption root inherit from its parent +# 7. Verify the encryption root for all datasets is now the parent dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -Rf $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs change-key' should correctly update encryption " \ + "roots with clones" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE2 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/child" +log_must zfs snapshot $TESTPOOL/$TESTFS1/child@1 +log_must zfs clone $TESTPOOL/$TESTFS1/child@1 $TESTPOOL/$TESTFS1/clone1 +log_must zfs clone $TESTPOOL/$TESTFS1/child@1 $TESTPOOL/$TESTFS1/clone2 +log_must zfs create $TESTPOOL/$TESTFS1/child/A +log_must zfs create $TESTPOOL/$TESTFS1/clone1/B +log_must zfs create $TESTPOOL/$TESTFS1/clone2/C + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1 $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2 $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child/A $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1/B $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2/C $TESTPOOL/$TESTFS1/child + +log_must zfs change-key -i $TESTPOOL/$TESTFS1/child + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child/A $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1/B $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2/C $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key' correctly updates encryption roots with clones" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh index 336c7b2538b..2c7584d3541 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh @@ -29,11 +29,12 @@ # 1. Create an encrypted dataset # 2. Clone the encryption root # 3. Clone the clone -# 4. Verify the encryption root of all three datasets is the origin +# 4. Add children to each of these three datasets +# 4. Verify the encryption root of all datasets is the origin # 5. Promote the clone of the clone -# 6. Verify the encryption root of all three datasets is still the origin -# 7. Promote the clone of the original encryption root -# 8. Verify the encryption root of all three datasets is the promoted dataset +# 6. Verify the encryption root of all datasets is still the origin +# 7. Promote the dataset again, so it is now the encryption root +# 8. Verify the encryption root of all datasets is the promoted dataset # verify_runnable "both" @@ -62,19 +63,31 @@ log_must zfs snap $snaproot log_must zfs clone $snaproot $TESTPOOL/clone1 log_must zfs snap $snapclone log_must zfs clone $snapclone $TESTPOOL/clone2 +log_must zfs create $TESTPOOL/$TESTFS1/child0 +log_must zfs create $TESTPOOL/clone1/child1 +log_must zfs create $TESTPOOL/clone2/child2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/$TESTFS1 log_must zfs promote $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/$TESTFS1 log_must zfs promote $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/clone2 log_pass "ZFS promotes clones of an encryption root" From b8bd3ec2af195f3a7be0f093ca8839be78d58755 Mon Sep 17 00:00:00 2001 From: loli10K Date: Mon, 16 Sep 2019 19:44:51 +0200 Subject: [PATCH 223/325] ZTS: Fix /usr/bin/env: 'python2': No such file or directory Since 4f342e45 env(1) must be able to find a "python2" executable in the "constrained path" on systems configured with --with-python=2.x otherwise the ZFS Test Suite won't be able to use Python scripts. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: loli10K Closes #9325 --- tests/zfs-tests/include/commands.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 127a1477d42..0d75de9a233 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -95,6 +95,7 @@ export SYSTEM_FILES='arp ps pwd python + python2 python3 quotaon readlink From 444df1051c82c8c6285e46148e524477df12d5c9 Mon Sep 17 00:00:00 2001 From: loli10K Date: Mon, 16 Sep 2019 19:46:59 +0200 Subject: [PATCH 224/325] Device removal of indirect vdev panics the kernel This commit fixes a NULL pointer dereference triggered in spa_vdev_remove_top_check() by trying to "zpool remove" an indirect vdev. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9327 --- module/zfs/vdev_removal.c | 4 ++ tests/runfiles/linux.run | 3 +- .../tests/functional/removal/Makefile.am | 2 +- .../functional/removal/remove_indirect.ksh | 58 +++++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/removal/remove_indirect.ksh diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 6f64edd8c47..5dba2fb6989 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -1936,6 +1937,9 @@ spa_vdev_remove_top_check(vdev_t *vd) if (vd != vd->vdev_top) return (SET_ERROR(ENOTSUP)); + if (!vdev_is_concrete(vd)) + return (SET_ERROR(ENOTSUP)); + if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL)) return (SET_ERROR(ENOTSUP)); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 7977724b823..04ec2936bb3 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -772,7 +772,8 @@ tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space', 'removal_with_remove', 'removal_with_scrub', 'removal_with_send', 'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write', 'removal_with_zdb', 'remove_expanded', - 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz'] + 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz', + 'remove_indirect'] tags = ['functional', 'removal'] [tests/functional/rename_dirs] diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index 2bd015e8a6e..1551a92e52c 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -29,7 +29,7 @@ dist_pkgdata_SCRIPTS = \ removal_with_send.ksh removal_with_send_recv.ksh \ removal_with_snapshot.ksh removal_with_write.ksh \ removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \ - remove_raidz.ksh remove_expanded.ksh + remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh dist_pkgdata_DATA = \ removal.kshlib diff --git a/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh b/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh new file mode 100755 index 00000000000..c4ba0d9ac56 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright 2019, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# Device removal cannot remove non-concrete vdevs +# +# STRATEGY: +# 1. Create a pool with removable devices +# 2. Remove a top-level device +# 3. Verify we can't remove the "indirect" vdev created by the first removal +# + +verify_runnable "global" + +function cleanup +{ + destroy_pool $TESTPOOL + log_must rm -f $TEST_BASE_DIR/device-{1,2,3} +} + +log_assert "Device removal should not be able to remove non-concrete vdevs" +log_onexit cleanup + +# 1. Create a pool with removable devices +truncate -s $MINVDEVSIZE $TEST_BASE_DIR/device-{1,2,3} +zpool create $TESTPOOL $TEST_BASE_DIR/device-{1,2,3} + +# 2. Remove a top-level device +log_must zpool remove $TESTPOOL $TEST_BASE_DIR/device-1 +log_must wait_for_removal $TESTPOOL + +# 3. Verify we can't remove the "indirect" vdev created by the first removal +INDIRECT_VDEV=$(zpool list -v -g $TESTPOOL | awk '{if ($2 == "-") { print $1; exit} }') +log_must test -n "$INDIRECT_VDEV" +log_mustnot zpool remove $TESTPOOL $INDIRECT_VDEV + +log_pass "Device removal cannot remove non-concrete vdevs" From 3ec97ba6f181cf7dd2b000139790d8abf9758d1b Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Wed, 18 Sep 2019 12:05:57 -0400 Subject: [PATCH 225/325] Refactor libzfs_error_init newlines Move the trailing newlines from the error message strings to the format strings to more closely match the other error messages. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Ryan Moeller Closes #9330 --- cmd/mount_zfs/mount_zfs.c | 2 +- cmd/zfs/zfs_main.c | 2 +- cmd/zinject/zinject.c | 2 +- cmd/zpool/zpool_main.c | 2 +- lib/libzfs/libzfs_util.c | 10 +++++----- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c index a9b1e166b4b..a37dd6f53a1 100644 --- a/cmd/mount_zfs/mount_zfs.c +++ b/cmd/mount_zfs/mount_zfs.c @@ -489,7 +489,7 @@ main(int argc, char **argv) zfsutil = 1; if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (MOUNT_SYSERR); } diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 3dd2b388690..cd8c0aca076 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -8172,7 +8172,7 @@ main(int argc, char **argv) return (zfs_do_version(argc, argv)); if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index cff7f861a2e..1795bfd4506 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -763,7 +763,7 @@ main(int argc, char **argv) uint32_t dvas = 0; if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index b9c7462b618..2c5c88e694b 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -9282,7 +9282,7 @@ main(int argc, char **argv) return (zpool_do_version(argc, argv)); if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index eed6282ca35..4a9676668fb 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -69,21 +69,21 @@ libzfs_error_init(int error) case ENXIO: return (dgettext(TEXT_DOMAIN, "The ZFS modules are not " "loaded.\nTry running '/sbin/modprobe zfs' as root " - "to load them.\n")); + "to load them.")); case ENOENT: return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts " "are required.\nTry running 'udevadm trigger' and 'mount " - "-t proc proc /proc' as root.\n")); + "-t proc proc /proc' as root.")); case ENOEXEC: return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be " "auto-loaded.\nTry running '/sbin/modprobe zfs' as " - "root to manually load them.\n")); + "root to manually load them.")); case EACCES: return (dgettext(TEXT_DOMAIN, "Permission denied the " - "ZFS utilities must be run as root.\n")); + "ZFS utilities must be run as root.")); default: return (dgettext(TEXT_DOMAIN, "Failed to initialize the " - "libzfs library.\n")); + "libzfs library.")); } } From 8498a2f3f8340718c929e8ef047bbb7b187afa29 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Sun, 22 Sep 2019 18:27:53 -0400 Subject: [PATCH 226/325] Use signed types to prevent subtraction overflow The difference between the sizes could be positive or negative. Leaving the types as unsigned means the result overflows when the difference is negative and removing the labs() means we'll have introduced a bug. The subtraction results in the correct value when the unsigned integer is interpreted as a signed integer by labs(). Clang doesn't see that we're doing a subtraction and abusing the types. It sees the result of the subtraction, an unsigned value, being passed to an absolute value function and emits a warning which we treat as an error. Reviewed by: Youzhong Yang Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9355 --- cmd/zpool/zpool_vdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index ef2a30996e5..527fca08b88 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -829,7 +829,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) rep.zprl_children = 1; rep.zprl_parity = 0; } else { - uint64_t vdev_size; + int64_t vdev_size; /* * This is a mirror or RAID-Z vdev. Go through and make @@ -859,12 +859,12 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) */ type = NULL; dontreport = 0; - vdev_size = -1ULL; + vdev_size = -1LL; for (c = 0; c < children; c++) { nvlist_t *cnv = child[c]; char *path; struct stat64 statbuf; - uint64_t size = -1ULL; + int64_t size = -1LL; char *childtype; int fd, err; @@ -955,7 +955,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) * (~16MB) then report an error. */ if (!dontreport && - (vdev_size != -1ULL && + (vdev_size != -1LL && (labs(size - vdev_size) > ZPOOL_FUZZ))) { if (ret != NULL) From ec5d76e853a4b040fd76d803d51a40f872065c09 Mon Sep 17 00:00:00 2001 From: loli10K Date: Tue, 24 Sep 2019 21:01:37 +0200 Subject: [PATCH 227/325] diff_cb() does not handle large dnodes Trying to 'zfs diff' a snapshot with large dnodes will incorrectly try to access its interior slots when dnodesize > sizeof(dnode_phys_t). This is normally not an issue because the interior slots are zero-filled, which report_dnode() handles calling report_free_dnode_range(). However this is not the case for encrypted large dnodes or filesystem using many SA based xattrs where the extra data past the legacy dnode size boundary is interpreted as a dnode_phys_t. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Reviewed-by: Ryan Moeller Signed-off-by: loli10K Closes #7678 Closes #8931 Closes #9343 --- module/zfs/dmu_diff.c | 5 +++-- .../cli_root/zfs_diff/zfs_diff_encrypted.ksh | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index 76c32b12642..6a7cd844c44 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -130,7 +131,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dnode_phys_t *blk; arc_buf_t *abuf; arc_flags_t aflags = ARC_FLAG_WAIT; - int blksz = BP_GET_LSIZE(bp); + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; int zio_flags = ZIO_FLAG_CANFAIL; int i; @@ -142,7 +143,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (SET_ERROR(EIO)); blk = abuf->b_data; - for (i = 0; i < blksz >> DNODE_SHIFT; i++) { + for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = report_dnode(da, dnobj, blk+i); diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh index 471e9ca68e7..96e6d9b5ae8 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh @@ -24,14 +24,15 @@ # 1. Create an encrypted dataset # 2. Create two snapshots of the dataset # 3. Perform 'zfs diff -Ft' and verify no errors occur +# 4. Perform the same test on a dataset with large dnodes # verify_runnable "both" function cleanup { - datasetexists $TESTPOOL/$TESTFS1 && \ - log_must zfs destroy -r $TESTPOOL/$TESTFS1 + destroy_dataset "$TESTPOOL/$TESTFS1" "-r" + destroy_dataset "$TESTPOOL/$TESTFS2" "-r" } log_assert "'zfs diff' should work with encrypted datasets" @@ -50,4 +51,13 @@ log_must zfs snapshot $TESTPOOL/$TESTFS1@snap2 # 3. Perform 'zfs diff' and verify no errors occur log_must zfs diff -Ft $TESTPOOL/$TESTFS1@snap1 $TESTPOOL/$TESTFS1@snap2 +# 4. Perform the same test on a dataset with large dnodes +log_must eval "echo 'password' | zfs create -o dnodesize=4k \ + -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS2" +MNTPOINT="$(get_prop mountpoint $TESTPOOL/$TESTFS2)" +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1 +log_must touch "$MNTPOINT/file" +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap2 +log_must zfs diff -Ft $TESTPOOL/$TESTFS2@snap1 $TESTPOOL/$TESTFS2@snap2 + log_pass "'zfs diff' works with encrypted datasets" From 055238d2eb1a4b8ff6032f31aa871a259eddb59d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 25 Sep 2019 09:23:29 -0700 Subject: [PATCH 228/325] Add warning for zfs_vdev_elevator option removal Originally the zfs_vdev_elevator module option was added as a convenience so the requested elevator would be automatically set on the underlying block devices. At the time this was simple because the kernel provided an API function which did exactly this. This API was then removed in the Linux 4.12 kernel which prompted us to add compatibly code to set the elevator via a usermodehelper. While well intentioned this introduced a bug which could cause a system hang, that issue was subsequently fixed by commit 2a0d4188. In order to avoid future bugs in this area, and to simplify the code, this functionality is being deprecated. A console warning has been added to notify any existing consumers and the documentation updated accordingly. This option will remain for the lifetime of the 0.8.x series for compatibility but if planned to be phased out of master. Reviewed-by: Richard Laager Reviewed-by: loli10K Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Issue #8664 Closes #9317 --- man/man5/zfs-module-parameters.5 | 6 ++++-- module/zfs/vdev_disk.c | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 83d34025ab8..536eb1466bd 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2804,8 +2804,10 @@ Default value: \fB32,768\fR. \fBzfs_vdev_scheduler\fR (charp) .ad .RS 12n -Set the Linux I/O scheduler on whole disk vdevs to this scheduler. Valid options -are noop, cfq, bfq & deadline +Set the Linux I/O scheduler on whole disk vdevs to this scheduler. This +option has been deprecated and will be removed in a future release. The +standard \fB/sys/block//queue/scheduler\fR interface should be used +to set a block device scheduler. .sp Default value: \fBnoop\fR. .RE diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 46437f21fb7..823a54c12e9 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -931,7 +931,14 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) mutex_exit(&spa_namespace_lock); } - return (param_set_charp(val, kp)); + + int error = param_set_charp(val, kp); + if (error == 0) { + printk(KERN_INFO "The 'zfs_vdev_scheduler' module option " + "will be removed in a future release.\n"); + } + + return (error); } vdev_ops_t vdev_disk_ops = { From 988b04047616065f8c3be08912c6f3efb4e57ee1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 25 Sep 2019 09:24:45 -0700 Subject: [PATCH 229/325] ZTS: harden xattr/cleanup.ksh When the xattr/cleanup.ksh script is unable to remove the test group due to an active process then it will not call default_cleanup. This will result in a zvol_ENOSPC/setup failure when attempting to create the /mnt/testdir directory which will already exist. Resolve the issue by performing the default_cleanup before removing the test user and group to ensure this step always happens. Also allow one more retry to further minimize the likelihood of the cleanup failing. Reviewed-by: Ryan Moeller Signed-off-by: Brian Behlendorf Closes #9358 --- tests/zfs-tests/include/libtest.shlib | 2 +- tests/zfs-tests/tests/functional/xattr/cleanup.ksh | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index bc76f09a03c..b439b44f586 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -2355,7 +2355,7 @@ function del_user # fi if id $user > /dev/null 2>&1; then - log_must_retry "currently used" 5 userdel $user + log_must_retry "currently used" 6 userdel $user fi [[ -d $basedir/$user ]] && rm -fr $basedir/$user diff --git a/tests/zfs-tests/tests/functional/xattr/cleanup.ksh b/tests/zfs-tests/tests/functional/xattr/cleanup.ksh index 5090906199c..b3629629c78 100755 --- a/tests/zfs-tests/tests/functional/xattr/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/xattr/cleanup.ksh @@ -30,9 +30,6 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/xattr/xattr_common.kshlib -del_user $ZFS_USER -del_group $ZFS_GROUP - USES_NIS=$(cat $TEST_BASE_DIR/zfs-xattr-test-nis.txt) rm $TEST_BASE_DIR/zfs-xattr-test-nis.txt @@ -41,4 +38,9 @@ then svcadm enable svc:/network/nis/client:default fi -default_cleanup +default_cleanup_noexit + +del_user $ZFS_USER +del_group $ZFS_GROUP + +log_pass From 62c034f6d45df04fc81d6c7ca5bd884e17bfee19 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Jul 2019 09:31:20 -0700 Subject: [PATCH 230/325] Linux 5.0 compat: SIMD compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS, and 5.0 and newer kernels. This commit squashes the following commits from master in to a single commit which can be applied to 0.8.2. 10fa2545 - Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore b88ca2ac - Enable SIMD for encryption 095b5412 - Fix CONFIG_X86_DEBUG_FPU build failure e5db3134 - Linux 5.0 compat: SIMD compatibility Reviewed-by: Fabian Grünbichler Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf TEST_ZIMPORT_SKIP="yes" --- cmd/ztest/ztest.c | 3 + config/kernel-fpu.m4 | 77 ++++- include/linux/Makefile.am | 1 + include/linux/simd.h | 42 +++ include/linux/simd_aarch64.h | 23 +- include/linux/simd_x86.h | 301 ++++++++++++++++---- include/sys/vdev_raidz.h | 2 +- include/sys/vdev_raidz_impl.h | 2 +- module/icp/algs/aes/aes_impl.c | 31 +- module/icp/algs/aes/aes_impl_aesni.c | 2 +- module/icp/algs/modes/gcm.c | 38 ++- module/icp/algs/modes/gcm_pclmulqdq.c | 2 +- module/icp/include/aes/aes_impl.h | 4 +- module/icp/include/modes/gcm_impl.h | 4 +- module/icp/io/aes.c | 2 +- module/zcommon/zfs_fletcher.c | 76 +++-- module/zcommon/zfs_fletcher_aarch64_neon.c | 2 +- module/zcommon/zfs_fletcher_avx512.c | 2 +- module/zcommon/zfs_fletcher_intel.c | 2 +- module/zcommon/zfs_fletcher_sse.c | 5 +- module/zcommon/zfs_prop.c | 14 + module/zfs/vdev_raidz_math.c | 93 +++--- module/zfs/vdev_raidz_math_aarch64_neon.c | 2 +- module/zfs/vdev_raidz_math_aarch64_neonx2.c | 2 +- module/zfs/vdev_raidz_math_avx2.c | 2 +- module/zfs/vdev_raidz_math_avx512bw.c | 5 +- module/zfs/vdev_raidz_math_avx512f.c | 5 +- module/zfs/vdev_raidz_math_sse2.c | 2 +- module/zfs/vdev_raidz_math_ssse3.c | 4 +- module/zfs/zio_crypt.c | 2 +- 30 files changed, 547 insertions(+), 205 deletions(-) create mode 100644 include/linux/simd.h diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index e83654a3262..8fe412672ff 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -7110,6 +7111,8 @@ ztest_run(ztest_shared_t *zs) metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; + VERIFY0(vdev_raidz_impl_set("cycle")); + dmu_objset_stats_t dds; VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool, DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os)); diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index ebb02fb09a2..49316aab459 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,8 +2,9 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel -dnl # 5.0: All kernel fpu functions are GPL only, so we can't use them. -dnl # (nothing defined) +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. +dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. +dnl # HAVE_KERNEL_FPU_INTERNAL dnl # dnl # 4.2: Use __kernel_fpu_{begin,end}() dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU @@ -12,7 +13,11 @@ dnl # Pre-4.2: Use kernel_fpu_{begin,end}() dnl # HAVE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU dnl # AC_DEFUN([ZFS_AC_KERNEL_FPU], [ - AC_MSG_CHECKING([which kernel_fpu header to use]) + dnl # + dnl # N.B. The header check is performed before all other checks since + dnl # it depends on HAVE_KERNEL_FPU_API_HEADER being set in confdefs.h. + dnl # + AC_MSG_CHECKING([whether fpu headers are available]) ZFS_LINUX_TRY_COMPILE([ #include #include @@ -25,9 +30,13 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_MSG_RESULT(i387.h & xcr.h) ]) - AC_MSG_CHECKING([which kernel_fpu function to use]) + dnl # + dnl # Legacy kernel + dnl # + AC_MSG_CHECKING([whether kernel fpu is available]) ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include + #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include #else @@ -45,8 +54,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ + dnl # + dnl # Linux 4.2 kernel + dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include + #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include #else @@ -57,12 +70,60 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ ],[ __kernel_fpu_begin(); __kernel_fpu_end(); - ], [__kernel_fpu_begin], [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ + ], [__kernel_fpu_begin], + [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ AC_MSG_RESULT(__kernel_fpu_*) - AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, [kernel has __kernel_fpu_* functions]) - AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) + AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, + [kernel has __kernel_fpu_* functions]) + AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, + [kernel exports FPU functions]) ],[ - AC_MSG_RESULT(not exported) + ZFS_LINUX_TRY_COMPILE([ + #include + + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif + + #if !defined(__x86) + #error Unsupported architecture + #endif + + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + #if !defined(XSTATE_XSAVE) + #error XSTATE_XSAVE not defined + #endif + + #if !defined(XSTATE_XRESTORE) + #error XSTATE_XRESTORE not defined + #endif + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->state; + struct fregs_state *fr __attribute__ ((unused)) = + &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = + &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = + &st->xsave; + ], [ + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) + ],[ + AC_MSG_RESULT(unavailable) + ]) ]) ]) ]) diff --git a/include/linux/Makefile.am b/include/linux/Makefile.am index efb49520e63..2455759e813 100644 --- a/include/linux/Makefile.am +++ b/include/linux/Makefile.am @@ -7,6 +7,7 @@ KERNEL_H = \ $(top_srcdir)/include/linux/blkdev_compat.h \ $(top_srcdir)/include/linux/utsname_compat.h \ $(top_srcdir)/include/linux/kmap_compat.h \ + $(top_srcdir)/include/linux/simd.h \ $(top_srcdir)/include/linux/simd_x86.h \ $(top_srcdir)/include/linux/simd_aarch64.h \ $(top_srcdir)/include/linux/mod_compat.h \ diff --git a/include/linux/simd.h b/include/linux/simd.h new file mode 100644 index 00000000000..bb5f0f02a9c --- /dev/null +++ b/include/linux/simd.h @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2019 Lawrence Livermore National Security, LLC. + */ + +#ifndef _SIMD_H +#define _SIMD_H + +#if defined(__x86) +#include + +#elif defined(__aarch64__) +#include +#else + +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + +#endif +#endif /* _SIMD_H */ diff --git a/include/linux/simd_aarch64.h b/include/linux/simd_aarch64.h index 56153a16072..7ba308d1543 100644 --- a/include/linux/simd_aarch64.h +++ b/include/linux/simd_aarch64.h @@ -27,9 +27,10 @@ * * Kernel fpu methods: * kfpu_allowed() - * kfpu_initialize() * kfpu_begin() * kfpu_end() + * kfpu_init() + * kfpu_fini() */ #ifndef _SIMD_AARCH64_H @@ -43,20 +44,20 @@ #if defined(_KERNEL) #include -#define kfpu_begin() \ -{ \ - kernel_neon_begin(); \ -} -#define kfpu_end() \ -{ \ - kernel_neon_end(); \ -} +#define kfpu_allowed() 1 +#define kfpu_begin() kernel_neon_begin() +#define kfpu_end() kernel_neon_end() +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #else /* * fpu dummy methods for userspace */ -#define kfpu_begin() do {} while (0) -#define kfpu_end() do {} while (0) +#define kfpu_allowed() 1 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #endif /* defined(_KERNEL) */ #endif /* __aarch64__ */ diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 0489bfaa3a7..69dbd557906 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -27,9 +27,10 @@ * * Kernel fpu methods: * kfpu_allowed() - * kfpu_initialize() * kfpu_begin() * kfpu_end() + * kfpu_init() + * kfpu_fini() * * SIMD support: * @@ -84,6 +85,15 @@ #if defined(_KERNEL) +/* + * Disable the WARN_ON_FPU() macro to prevent additional dependencies + * when providing the kfpu_* functions. Relevant warnings are included + * as appropriate and are unconditionally enabled. + */ +#if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU) +#undef CONFIG_X86_DEBUG_FPU +#endif + #if defined(HAVE_KERNEL_FPU_API_HEADER) #include #include @@ -92,33 +102,231 @@ #include #endif +/* + * The following cases are for kernels which export either the + * kernel_fpu_* or __kernel_fpu_* functions. + */ +#if defined(KERNEL_EXPORTS_X86_FPU) + +#define kfpu_allowed() 1 +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + #if defined(HAVE_UNDERSCORE_KERNEL_FPU) #define kfpu_begin() \ -{ \ - preempt_disable(); \ +{ \ + preempt_disable(); \ __kernel_fpu_begin(); \ } -#define kfpu_end() \ -{ \ - __kernel_fpu_end(); \ - preempt_enable(); \ +#define kfpu_end() \ +{ \ + __kernel_fpu_end(); \ + preempt_enable(); \ } + #elif defined(HAVE_KERNEL_FPU) -#define kfpu_begin() kernel_fpu_begin() +#define kfpu_begin() kernel_fpu_begin() #define kfpu_end() kernel_fpu_end() + #else -/* Kernel doesn't export any kernel_fpu_* functions */ -#include /* For kernel xgetbv() */ -#define kfpu_begin() panic("This code should never run") -#define kfpu_end() panic("This code should never run") -#endif /* defined(HAVE_KERNEL_FPU) */ +/* + * This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then + * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined. + */ +#error "Unreachable kernel configuration" +#endif + +#else /* defined(KERNEL_EXPORTS_X86_FPU) */ + +/* + * When the kernel_fpu_* symbols are unavailable then provide our own + * versions which allow the FPU to be safely used. + */ +#if defined(HAVE_KERNEL_FPU_INTERNAL) + +extern union fpregs_state **zfs_kfpu_fpregs; + +/* + * Initialize per-cpu variables to store FPU state. + */ +static inline void +kfpu_fini(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (zfs_kfpu_fpregs[cpu] != NULL) { + kfree(zfs_kfpu_fpregs[cpu]); + } + } + + kfree(zfs_kfpu_fpregs); +} + +static inline int +kfpu_init(void) +{ + int cpu; + + zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * + sizeof (union fpregs_state *), GFP_KERNEL); + if (zfs_kfpu_fpregs == NULL) + return (-ENOMEM); + + for_each_possible_cpu(cpu) { + zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (union fpregs_state), + GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); + if (zfs_kfpu_fpregs[cpu] == NULL) { + kfpu_fini(); + return (-ENOMEM); + } + } + + return (0); +} + +#define kfpu_allowed() 1 +#define ex_handler_fprestore ex_handler_default + +/* + * FPU save and restore instructions. + */ +#define __asm __asm__ __volatile__ +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ + : : [addr] "m" (rval)); + +static inline void +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + int err; + + low = mask; + hi = mask >> 32; + XSTATE_XSAVE(addr, low, hi, err); + WARN_ON_ONCE(err); +} + +static inline void +kfpu_save_fxsr(struct fxregs_state *addr) +{ + if (IS_ENABLED(CONFIG_X86_32)) + kfpu_fxsave(addr); + else + kfpu_fxsaveq(addr); +} + +static inline void +kfpu_save_fsave(struct fregs_state *addr) +{ + kfpu_fnsave(addr); +} + +static inline void +kfpu_begin(void) +{ + /* + * Preemption and interrupts must be disabled for the critical + * region where the FPU state is being modified. + */ + preempt_disable(); + local_irq_disable(); + + /* + * The current FPU registers need to be preserved by kfpu_begin() + * and restored by kfpu_end(). They are stored in a dedicated + * per-cpu variable, not in the task struct, this allows any user + * FPU state to be correctly preserved and restored. + */ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_save_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(&state->fxsave); + } else { + kfpu_save_fsave(&state->fsave); + } +} + +static inline void +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + XSTATE_XRESTORE(addr, low, hi); +} + +static inline void +kfpu_restore_fxsr(struct fxregs_state *addr) +{ + /* + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only + * restores the _x87 FOP, FIP, and FDP registers when an exception + * is pending. Clean the _x87 state to force the restore. + */ + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) + kfpu_fxsr_clean(addr); + + if (IS_ENABLED(CONFIG_X86_32)) { + kfpu_fxrstor(addr); + } else { + kfpu_fxrstorq(addr); + } +} + +static inline void +kfpu_restore_fsave(struct fregs_state *addr) +{ + kfpu_frstor(addr); +} + +static inline void +kfpu_end(void) +{ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_restore_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_restore_fxsr(&state->fxsave); + } else { + kfpu_restore_fsave(&state->fsave); + } + + local_irq_enable(); + preempt_enable(); +} #else + +/* + * FPU support is unavailable. + */ +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */ + +#else /* defined(_KERNEL) */ /* - * fpu dummy methods for userspace + * FPU dummy methods for user space. */ -#define kfpu_begin() do {} while (0) -#define kfpu_end() do {} while (0) +#define kfpu_allowed() 1 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) #endif /* defined(_KERNEL) */ /* @@ -289,7 +497,6 @@ CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); #endif /* !defined(_KERNEL) */ - /* * Detect register set support */ @@ -300,7 +507,7 @@ __simd_state_enabled(const uint64_t state) uint64_t xcr0; #if defined(_KERNEL) -#if defined(X86_FEATURE_OSXSAVE) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_OSXSAVE) has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE); #else has_osxsave = B_FALSE; @@ -330,11 +537,7 @@ static inline boolean_t zfs_sse_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse()); #endif @@ -347,11 +550,7 @@ static inline boolean_t zfs_sse2_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM2)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse2()); #endif @@ -364,11 +563,7 @@ static inline boolean_t zfs_sse3_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM3)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse3()); #endif @@ -381,11 +576,7 @@ static inline boolean_t zfs_ssse3_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_SSSE3)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_ssse3()); #endif @@ -398,11 +589,7 @@ static inline boolean_t zfs_sse4_1_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM4_1)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse4_1()); #endif @@ -415,11 +602,7 @@ static inline boolean_t zfs_sse4_2_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM4_2)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse4_2()); #endif @@ -433,11 +616,7 @@ zfs_avx_available(void) { boolean_t has_avx; #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) has_avx = !!boot_cpu_has(X86_FEATURE_AVX); -#else - has_avx = B_FALSE; -#endif #elif !defined(_KERNEL) has_avx = __cpuid_has_avx(); #endif @@ -453,11 +632,7 @@ zfs_avx2_available(void) { boolean_t has_avx2; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX2) && defined(KERNEL_EXPORTS_X86_FPU) has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2); -#else - has_avx2 = B_FALSE; -#endif #elif !defined(_KERNEL) has_avx2 = __cpuid_has_avx2(); #endif @@ -472,7 +647,7 @@ static inline boolean_t zfs_bmi1_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_BMI1) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_BMI1) return (!!boot_cpu_has(X86_FEATURE_BMI1)); #else return (B_FALSE); @@ -489,7 +664,7 @@ static inline boolean_t zfs_bmi2_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_BMI2) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_BMI2) return (!!boot_cpu_has(X86_FEATURE_BMI2)); #else return (B_FALSE); @@ -506,7 +681,7 @@ static inline boolean_t zfs_aes_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_AES) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AES) return (!!boot_cpu_has(X86_FEATURE_AES)); #else return (B_FALSE); @@ -523,7 +698,7 @@ static inline boolean_t zfs_pclmulqdq_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_PCLMULQDQ) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_PCLMULQDQ) return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ)); #else return (B_FALSE); @@ -557,7 +732,7 @@ zfs_avx512f_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512F) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512F) has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F); #else has_avx512 = B_FALSE; @@ -576,7 +751,7 @@ zfs_avx512cd_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512CD) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512CD) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512CD); #else @@ -596,7 +771,7 @@ zfs_avx512er_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512ER) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512ER) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512ER); #else @@ -616,7 +791,7 @@ zfs_avx512pf_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512PF) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512PF) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512PF); #else @@ -636,7 +811,7 @@ zfs_avx512bw_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512BW) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512BW) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512BW); #else @@ -656,7 +831,7 @@ zfs_avx512dq_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512DQ) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512DQ) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512DQ); #else @@ -676,7 +851,7 @@ zfs_avx512vl_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512VL) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512VL) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL); #else @@ -696,7 +871,7 @@ zfs_avx512ifma_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512IFMA) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512IFMA) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512IFMA); #else @@ -716,7 +891,7 @@ zfs_avx512vbmi_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512VBMI) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512VBMI) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VBMI); #else diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h index 2ce32469d47..0ce2b5ea1d6 100644 --- a/include/sys/vdev_raidz.h +++ b/include/sys/vdev_raidz.h @@ -51,7 +51,7 @@ int vdev_raidz_reconstruct(struct raidz_map *, const int *, int); */ void vdev_raidz_math_init(void); void vdev_raidz_math_fini(void); -struct raidz_impl_ops *vdev_raidz_math_get_ops(void); +const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); int vdev_raidz_math_generate(struct raidz_map *); int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *, const int); diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h index 94960ba957c..2e38962cc31 100644 --- a/include/sys/vdev_raidz_impl.h +++ b/include/sys/vdev_raidz_impl.h @@ -126,7 +126,7 @@ typedef struct raidz_map { uintptr_t rm_reports; /* # of referencing checksum reports */ uint8_t rm_freed; /* map no longer has referencing ZIO */ uint8_t rm_ecksuminjected; /* checksum error was injected */ - raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ + const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ } raidz_map_t; diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index 36e0686a51c..fe15d76d16a 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Initialize AES encryption and decryption key schedules. @@ -40,9 +41,9 @@ void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched) { - aes_impl_ops_t *ops = aes_impl_get_ops(); - aes_key_t *newbie = keysched; - uint_t keysize, i, j; + const aes_impl_ops_t *ops = aes_impl_get_ops(); + aes_key_t *newbie = keysched; + uint_t keysize, i, j; union { uint64_t ka64[4]; uint32_t ka32[8]; @@ -252,12 +253,17 @@ static size_t aes_supp_impl_cnt = 0; static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)]; /* - * Selects the aes operations for encrypt/decrypt/key setup + * Returns the AES operations for encrypt/decrypt/key setup. When a + * SIMD implementation is not allowed in the current context, then + * fallback to the fastest generic implementation. */ -aes_impl_ops_t * -aes_impl_get_ops() +const aes_impl_ops_t * +aes_impl_get_ops(void) { - aes_impl_ops_t *ops = NULL; + if (!kfpu_allowed()) + return (&aes_generic_impl); + + const aes_impl_ops_t *ops = NULL; const uint32_t impl = AES_IMPL_READ(icp_aes_impl); switch (impl) { @@ -266,15 +272,13 @@ aes_impl_get_ops() ops = &aes_fastest_impl; break; case IMPL_CYCLE: - { + /* Cycle through supported implementations */ ASSERT(aes_impl_initialized); ASSERT3U(aes_supp_impl_cnt, >, 0); - /* Cycle through supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt; ops = aes_supp_impl[idx]; - } - break; + break; default: ASSERT3U(impl, <, aes_supp_impl_cnt); ASSERT3U(aes_supp_impl_cnt, >, 0); @@ -288,13 +292,16 @@ aes_impl_get_ops() return (ops); } +/* + * Initialize all supported implementations. + */ void aes_impl_init(void) { aes_impl_ops_t *curr_impl; int i, c; - /* move supported impl into aes_supp_impls */ + /* Move supported implementations into aes_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) { curr_impl = (aes_impl_ops_t *)aes_all_impl[i]; diff --git a/module/icp/algs/aes/aes_impl_aesni.c b/module/icp/algs/aes/aes_impl_aesni.c index 97f7c3a4781..222c176aaba 100644 --- a/module/icp/algs/aes/aes_impl_aesni.c +++ b/module/icp/algs/aes/aes_impl_aesni.c @@ -108,7 +108,7 @@ aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4], static boolean_t aes_aesni_will_work(void) { - return (zfs_aes_available()); + return (kfpu_allowed() && zfs_aes_available()); } const aes_impl_ops_t aes_aesni_impl = { diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 0afd957f0cf..efbf0fea969 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -29,6 +29,7 @@ #include #include #include +#include #define GHASH(c, d, t, o) \ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ @@ -46,7 +47,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; @@ -168,7 +169,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); uint8_t *ghash, *macp = NULL; int i, rv; @@ -320,7 +321,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; size_t pt_len; size_t remainder; uint8_t *ghash; @@ -427,7 +428,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint8_t *cb; ulong_t remainder = iv_len; ulong_t processed = 0; @@ -481,7 +482,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint8_t *ghash, *datap, *authp; size_t remainder, processed; @@ -660,12 +661,17 @@ static size_t gcm_supp_impl_cnt = 0; static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; /* - * Selects the gcm operation + * Returns the GCM operations for encrypt/decrypt/key setup. When a + * SIMD implementation is not allowed in the current context, then + * fallback to the fastest generic implementation. */ -gcm_impl_ops_t * +const gcm_impl_ops_t * gcm_impl_get_ops() { - gcm_impl_ops_t *ops = NULL; + if (!kfpu_allowed()) + return (&gcm_generic_impl); + + const gcm_impl_ops_t *ops = NULL; const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); switch (impl) { @@ -674,15 +680,13 @@ gcm_impl_get_ops() ops = &gcm_fastest_impl; break; case IMPL_CYCLE: - { + /* Cycle through supported implementations */ ASSERT(gcm_impl_initialized); ASSERT3U(gcm_supp_impl_cnt, >, 0); - /* Cycle through supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; ops = gcm_supp_impl[idx]; - } - break; + break; default: ASSERT3U(impl, <, gcm_supp_impl_cnt); ASSERT3U(gcm_supp_impl_cnt, >, 0); @@ -696,13 +700,16 @@ gcm_impl_get_ops() return (ops); } +/* + * Initialize all supported implementations. + */ void gcm_impl_init(void) { gcm_impl_ops_t *curr_impl; int i, c; - /* move supported impl into aes_supp_impls */ + /* Move supported implementations into gcm_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; @@ -711,7 +718,10 @@ gcm_impl_init(void) } gcm_supp_impl_cnt = c; - /* set fastest implementation. assume hardware accelerated is fastest */ + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) if (gcm_pclmulqdq_impl.is_supported()) { memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, diff --git a/module/icp/algs/modes/gcm_pclmulqdq.c b/module/icp/algs/modes/gcm_pclmulqdq.c index be00ba37b6a..8a43ba33a6e 100644 --- a/module/icp/algs/modes/gcm_pclmulqdq.c +++ b/module/icp/algs/modes/gcm_pclmulqdq.c @@ -52,7 +52,7 @@ gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res) static boolean_t gcm_pclmulqdq_will_work(void) { - return (zfs_pclmulqdq_available()); + return (kfpu_allowed() && zfs_pclmulqdq_available()); } const gcm_impl_ops_t gcm_pclmulqdq_impl = { diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index 3a3de91cf6a..a0b82ade455 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -201,9 +201,9 @@ extern const aes_impl_ops_t aes_aesni_impl; void aes_impl_init(void); /* - * Get selected aes implementation + * Returns optimal allowed AES implementation */ -struct aes_impl_ops *aes_impl_get_ops(void); +const struct aes_impl_ops *aes_impl_get_ops(void); #ifdef __cplusplus } diff --git a/module/icp/include/modes/gcm_impl.h b/module/icp/include/modes/gcm_impl.h index b78cc8aab01..28c8f63a7d4 100644 --- a/module/icp/include/modes/gcm_impl.h +++ b/module/icp/include/modes/gcm_impl.h @@ -64,9 +64,9 @@ extern const gcm_impl_ops_t gcm_pclmulqdq_impl; void gcm_impl_init(void); /* - * Get selected aes implementation + * Returns optimal allowed GCM implementation */ -struct gcm_impl_ops *gcm_impl_get_ops(void); +const struct gcm_impl_ops *gcm_impl_get_ops(void); #ifdef __cplusplus } diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c index 53b19369386..788bcef7d1e 100644 --- a/module/icp/io/aes.c +++ b/module/icp/io/aes.c @@ -206,7 +206,7 @@ aes_mod_init(void) { int ret; - /* find fastest implementations and set any requested implementations */ + /* Determine the fastest available implementation. */ aes_impl_init(); gcm_impl_init(); diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index f712ce40c6e..4c9db441b53 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -140,6 +140,7 @@ #include #include #include +#include #define FLETCHER_MIN_SIMD_SIZE 64 @@ -205,21 +206,19 @@ static struct fletcher_4_impl_selector { const char *fis_name; uint32_t fis_sel; } fletcher_4_impl_selectors[] = { -#if !defined(_KERNEL) { "cycle", IMPL_CYCLE }, -#endif { "fastest", IMPL_FASTEST }, { "scalar", IMPL_SCALAR } }; #if defined(_KERNEL) static kstat_t *fletcher_4_kstat; -#endif static struct fletcher_4_kstat { uint64_t native; uint64_t byteswap; } fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1]; +#endif /* Indicate that benchmark has been completed */ static boolean_t fletcher_4_initialized = B_FALSE; @@ -408,32 +407,36 @@ fletcher_4_impl_set(const char *val) return (err); } +/* + * Returns the Fletcher 4 operations for checksums. When a SIMD + * implementation is not allowed in the current context, then fallback + * to the fastest generic implementation. + */ static inline const fletcher_4_ops_t * fletcher_4_impl_get(void) { - fletcher_4_ops_t *ops = NULL; - const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); + if (!kfpu_allowed()) + return (&fletcher_4_superscalar4_ops); + + const fletcher_4_ops_t *ops = NULL; + uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); switch (impl) { case IMPL_FASTEST: ASSERT(fletcher_4_initialized); ops = &fletcher_4_fastest_impl; break; -#if !defined(_KERNEL) - case IMPL_CYCLE: { + case IMPL_CYCLE: + /* Cycle through supported implementations */ ASSERT(fletcher_4_initialized); ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); - static uint32_t cycle_count = 0; uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt; ops = fletcher_4_supp_impls[idx]; - } - break; -#endif + break; default: ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); ASSERT3U(impl, <, fletcher_4_supp_impls_cnt); - ops = fletcher_4_supp_impls[impl]; break; } @@ -659,6 +662,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n) typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *, zio_cksum_t *); +#if defined(_KERNEL) static void fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) { @@ -716,16 +720,18 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) /* restore original selection */ atomic_swap_32(&fletcher_4_impl_chosen, sel_save); } +#endif /* _KERNEL */ -void -fletcher_4_init(void) +/* + * Initialize and benchmark all supported implementations. + */ +static void +fletcher_4_benchmark(void) { - static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */ fletcher_4_ops_t *curr_impl; - char *databuf; int i, c; - /* move supported impl into fletcher_4_supp_impls */ + /* Move supported implementations into fletcher_4_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) { curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i]; @@ -735,19 +741,10 @@ fletcher_4_init(void) membar_producer(); /* complete fletcher_4_supp_impls[] init */ fletcher_4_supp_impls_cnt = c; /* number of supported impl */ -#if !defined(_KERNEL) - /* Skip benchmarking and use last implementation as fastest */ - memcpy(&fletcher_4_fastest_impl, - fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1], - sizeof (fletcher_4_fastest_impl)); - fletcher_4_fastest_impl.name = "fastest"; - membar_producer(); +#if defined(_KERNEL) + static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */ + char *databuf = vmem_alloc(data_size, KM_SLEEP); - fletcher_4_initialized = B_TRUE; - return; -#endif - /* Benchmark all supported implementations */ - databuf = vmem_alloc(data_size, KM_SLEEP); for (i = 0; i < data_size / sizeof (uint64_t); i++) ((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */ @@ -755,9 +752,28 @@ fletcher_4_init(void) fletcher_4_benchmark_impl(B_TRUE, databuf, data_size); vmem_free(databuf, data_size); +#else + /* + * Skip the benchmark in user space to avoid impacting libzpool + * consumers (zdb, zhack, zinject, ztest). The last implementation + * is assumed to be the fastest and used by default. + */ + memcpy(&fletcher_4_fastest_impl, + fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1], + sizeof (fletcher_4_fastest_impl)); + fletcher_4_fastest_impl.name = "fastest"; + membar_producer(); +#endif /* _KERNEL */ +} + +void +fletcher_4_init(void) +{ + /* Determine the fastest available implementation. */ + fletcher_4_benchmark(); #if defined(_KERNEL) - /* install kstats for all implementations */ + /* Install kstats for all implementations */ fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); if (fletcher_4_kstat != NULL) { diff --git a/module/zcommon/zfs_fletcher_aarch64_neon.c b/module/zcommon/zfs_fletcher_aarch64_neon.c index bd2db2b20fe..3b3c1b52b80 100644 --- a/module/zcommon/zfs_fletcher_aarch64_neon.c +++ b/module/zcommon/zfs_fletcher_aarch64_neon.c @@ -198,7 +198,7 @@ unsigned char SRC __attribute__((vector_size(16))); static boolean_t fletcher_4_aarch64_neon_valid(void) { - return (B_TRUE); + return (kfpu_allowed()); } const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = { diff --git a/module/zcommon/zfs_fletcher_avx512.c b/module/zcommon/zfs_fletcher_avx512.c index 7260a9864be..0d4cff21a50 100644 --- a/module/zcommon/zfs_fletcher_avx512.c +++ b/module/zcommon/zfs_fletcher_avx512.c @@ -157,7 +157,7 @@ STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap); static boolean_t fletcher_4_avx512f_valid(void) { - return (zfs_avx512f_available()); + return (kfpu_allowed() && zfs_avx512f_available()); } const fletcher_4_ops_t fletcher_4_avx512f_ops = { diff --git a/module/zcommon/zfs_fletcher_intel.c b/module/zcommon/zfs_fletcher_intel.c index 6dac047dad0..7f12efe6d8c 100644 --- a/module/zcommon/zfs_fletcher_intel.c +++ b/module/zcommon/zfs_fletcher_intel.c @@ -156,7 +156,7 @@ fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_avx2_valid(void) { - return (zfs_avx_available() && zfs_avx2_available()); + return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available()); } const fletcher_4_ops_t fletcher_4_avx2_ops = { diff --git a/module/zcommon/zfs_fletcher_sse.c b/module/zcommon/zfs_fletcher_sse.c index a0b42e5f5fa..e6389d6e5db 100644 --- a/module/zcommon/zfs_fletcher_sse.c +++ b/module/zcommon/zfs_fletcher_sse.c @@ -157,7 +157,7 @@ fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_sse2_valid(void) { - return (zfs_sse2_available()); + return (kfpu_allowed() && zfs_sse2_available()); } const fletcher_4_ops_t fletcher_4_sse2_ops = { @@ -214,7 +214,8 @@ fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_ssse3_valid(void) { - return (zfs_sse2_available() && zfs_ssse3_available()); + return (kfpu_allowed() && zfs_sse2_available() && + zfs_ssse3_available()); } const fletcher_4_ops_t fletcher_4_ssse3_ops = { diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index dab749138a6..f1c4158388f 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -853,10 +853,23 @@ zfs_prop_align_right(zfs_prop_t prop) #endif #if defined(_KERNEL) + +#include + +#if defined(HAVE_KERNEL_FPU_INTERNAL) +union fpregs_state **zfs_kfpu_fpregs; +EXPORT_SYMBOL(zfs_kfpu_fpregs); +#endif /* HAVE_KERNEL_FPU_INTERNAL */ + static int __init zcommon_init(void) { + int error = kfpu_init(); + if (error) + return (error); + fletcher_4_init(); + return (0); } @@ -864,6 +877,7 @@ static void __exit zcommon_fini(void) { fletcher_4_fini(); + kfpu_fini(); } module_init(zcommon_init); diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index 3ef67768f91..576d33befaa 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -27,9 +27,9 @@ #include #include #include - #include #include +#include extern boolean_t raidz_will_scalar_work(void); @@ -87,6 +87,7 @@ static uint32_t user_sel_impl = IMPL_FASTEST; static size_t raidz_supp_impl_cnt = 0; static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)]; +#if defined(_KERNEL) /* * kstats values for supported implementations * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s] @@ -95,14 +96,19 @@ static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1]; /* kstat for benchmarked implementations */ static kstat_t *raidz_math_kstat = NULL; +#endif /* - * Selects the raidz operation for raidz_map - * If rm_ops is set to NULL original raidz implementation will be used + * Returns the RAIDZ operations for raidz_map() parity calculations. When + * a SIMD implementation is not allowed in the current context, then fallback + * to the fastest generic implementation. */ -raidz_impl_ops_t * -vdev_raidz_math_get_ops() +const raidz_impl_ops_t * +vdev_raidz_math_get_ops(void) { + if (!kfpu_allowed()) + return (&vdev_raidz_scalar_impl); + raidz_impl_ops_t *ops = NULL; const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); @@ -111,18 +117,14 @@ vdev_raidz_math_get_ops() ASSERT(raidz_math_initialized); ops = &vdev_raidz_fastest_impl; break; -#if !defined(_KERNEL) case IMPL_CYCLE: - { + /* Cycle through all supported implementations */ ASSERT(raidz_math_initialized); ASSERT3U(raidz_supp_impl_cnt, >, 0); - /* Cycle through all supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt; ops = raidz_supp_impl[idx]; - } - break; -#endif + break; case IMPL_ORIGINAL: ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl; break; @@ -273,6 +275,8 @@ const char *raidz_rec_name[] = { "rec_pq", "rec_pr", "rec_qr", "rec_pqr" }; +#if defined(_KERNEL) + #define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1) static int @@ -435,21 +439,21 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) } } } +#endif -void -vdev_raidz_math_init(void) +/* + * Initialize and benchmark all supported implementations. + */ +static void +benchmark_raidz(void) { raidz_impl_ops_t *curr_impl; - zio_t *bench_zio = NULL; - raidz_map_t *bench_rm = NULL; - uint64_t bench_parity; - int i, c, fn; + int i, c; - /* move supported impl into raidz_supp_impl */ + /* Move supported impl into raidz_supp_impl */ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i]; - /* initialize impl */ if (curr_impl->init) curr_impl->init(); @@ -459,18 +463,10 @@ vdev_raidz_math_init(void) membar_producer(); /* complete raidz_supp_impl[] init */ raidz_supp_impl_cnt = c; /* number of supported impl */ -#if !defined(_KERNEL) - /* Skip benchmarking and use last implementation as fastest */ - memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1], - sizeof (vdev_raidz_fastest_impl)); - strcpy(vdev_raidz_fastest_impl.name, "fastest"); - - raidz_math_initialized = B_TRUE; - - /* Use 'cycle' math selection method for userspace */ - VERIFY0(vdev_raidz_impl_set("cycle")); - return; -#endif +#if defined(_KERNEL) + zio_t *bench_zio = NULL; + raidz_map_t *bench_rm = NULL; + uint64_t bench_parity; /* Fake a zio and run the benchmark on a warmed up buffer */ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); @@ -480,7 +476,7 @@ vdev_raidz_math_init(void) memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE); /* Benchmark parity generation methods */ - for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { + for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) { bench_parity = fn + 1; /* New raidz_map is needed for each generate_p/q/r */ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, @@ -495,7 +491,7 @@ vdev_raidz_math_init(void) bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, BENCH_COLS, PARITY_PQR); - for (fn = 0; fn < RAIDZ_REC_NUM; fn++) + for (int fn = 0; fn < RAIDZ_REC_NUM; fn++) benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl); vdev_raidz_map_free(bench_rm); @@ -503,11 +499,29 @@ vdev_raidz_math_init(void) /* cleanup the bench zio */ abd_free(bench_zio->io_abd); kmem_free(bench_zio, sizeof (zio_t)); +#else + /* + * Skip the benchmark in user space to avoid impacting libzpool + * consumers (zdb, zhack, zinject, ztest). The last implementation + * is assumed to be the fastest and used by default. + */ + memcpy(&vdev_raidz_fastest_impl, + raidz_supp_impl[raidz_supp_impl_cnt - 1], + sizeof (vdev_raidz_fastest_impl)); + strcpy(vdev_raidz_fastest_impl.name, "fastest"); +#endif /* _KERNEL */ +} - /* install kstats for all impl */ +void +vdev_raidz_math_init(void) +{ + /* Determine the fastest available implementation. */ + benchmark_raidz(); + +#if defined(_KERNEL) + /* Install kstats for all implementations */ raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - if (raidz_math_kstat != NULL) { raidz_math_kstat->ks_data = NULL; raidz_math_kstat->ks_ndata = UINT32_MAX; @@ -517,6 +531,7 @@ vdev_raidz_math_init(void) raidz_math_kstat_addr); kstat_install(raidz_math_kstat); } +#endif /* Finish initialization */ atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl); @@ -527,15 +542,15 @@ void vdev_raidz_math_fini(void) { raidz_impl_ops_t const *curr_impl; - int i; +#if defined(_KERNEL) if (raidz_math_kstat != NULL) { kstat_delete(raidz_math_kstat); raidz_math_kstat = NULL; } +#endif - /* fini impl */ - for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { + for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = raidz_all_maths[i]; if (curr_impl->fini) curr_impl->fini(); @@ -546,9 +561,7 @@ static const struct { char *name; uint32_t sel; } math_impl_opts[] = { -#if !defined(_KERNEL) { "cycle", IMPL_CYCLE }, -#endif { "fastest", IMPL_FASTEST }, { "original", IMPL_ORIGINAL }, { "scalar", IMPL_SCALAR } diff --git a/module/zfs/vdev_raidz_math_aarch64_neon.c b/module/zfs/vdev_raidz_math_aarch64_neon.c index e3ad0677650..0a67ceb8492 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon.c +++ b/module/zfs/vdev_raidz_math_aarch64_neon.c @@ -207,7 +207,7 @@ DEFINE_REC_METHODS(aarch64_neon); static boolean_t raidz_will_aarch64_neon_work(void) { - return (B_TRUE); // __arch64__ requires NEON + return (kfpu_allowed()); } const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = { diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c index f8688a06a8f..e072f51cd63 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c +++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c @@ -217,7 +217,7 @@ DEFINE_REC_METHODS(aarch64_neonx2); static boolean_t raidz_will_aarch64_neonx2_work(void) { - return (B_TRUE); // __arch64__ requires NEON + return (kfpu_allowed()); } const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = { diff --git a/module/zfs/vdev_raidz_math_avx2.c b/module/zfs/vdev_raidz_math_avx2.c index 063d29bcd8b..a12eb672081 100644 --- a/module/zfs/vdev_raidz_math_avx2.c +++ b/module/zfs/vdev_raidz_math_avx2.c @@ -396,7 +396,7 @@ DEFINE_REC_METHODS(avx2); static boolean_t raidz_will_avx2_work(void) { - return (zfs_avx_available() && zfs_avx2_available()); + return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available()); } const raidz_impl_ops_t vdev_raidz_avx2_impl = { diff --git a/module/zfs/vdev_raidz_math_avx512bw.c b/module/zfs/vdev_raidz_math_avx512bw.c index d605653db3f..2f545c9ec07 100644 --- a/module/zfs/vdev_raidz_math_avx512bw.c +++ b/module/zfs/vdev_raidz_math_avx512bw.c @@ -393,9 +393,8 @@ DEFINE_REC_METHODS(avx512bw); static boolean_t raidz_will_avx512bw_work(void) { - return (zfs_avx_available() && - zfs_avx512f_available() && - zfs_avx512bw_available()); + return (kfpu_allowed() && zfs_avx_available() && + zfs_avx512f_available() && zfs_avx512bw_available()); } const raidz_impl_ops_t vdev_raidz_avx512bw_impl = { diff --git a/module/zfs/vdev_raidz_math_avx512f.c b/module/zfs/vdev_raidz_math_avx512f.c index f4e4560ced8..75af7a8eea9 100644 --- a/module/zfs/vdev_raidz_math_avx512f.c +++ b/module/zfs/vdev_raidz_math_avx512f.c @@ -470,9 +470,8 @@ DEFINE_REC_METHODS(avx512f); static boolean_t raidz_will_avx512f_work(void) { - return (zfs_avx_available() && - zfs_avx2_available() && - zfs_avx512f_available()); + return (kfpu_allowed() && zfs_avx_available() && + zfs_avx2_available() && zfs_avx512f_available()); } const raidz_impl_ops_t vdev_raidz_avx512f_impl = { diff --git a/module/zfs/vdev_raidz_math_sse2.c b/module/zfs/vdev_raidz_math_sse2.c index 9985da27364..5b3a9385c9d 100644 --- a/module/zfs/vdev_raidz_math_sse2.c +++ b/module/zfs/vdev_raidz_math_sse2.c @@ -607,7 +607,7 @@ DEFINE_REC_METHODS(sse2); static boolean_t raidz_will_sse2_work(void) { - return (zfs_sse_available() && zfs_sse2_available()); + return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available()); } const raidz_impl_ops_t vdev_raidz_sse2_impl = { diff --git a/module/zfs/vdev_raidz_math_ssse3.c b/module/zfs/vdev_raidz_math_ssse3.c index 047a48d544f..62247cf8eb8 100644 --- a/module/zfs/vdev_raidz_math_ssse3.c +++ b/module/zfs/vdev_raidz_math_ssse3.c @@ -399,8 +399,8 @@ DEFINE_REC_METHODS(ssse3); static boolean_t raidz_will_ssse3_work(void) { - return (zfs_sse_available() && zfs_sse2_available() && - zfs_ssse3_available()); + return (kfpu_allowed() && zfs_sse_available() && + zfs_sse2_available() && zfs_ssse3_available()); } const raidz_impl_ops_t vdev_raidz_ssse3_impl = { diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index 7cf20f4136b..7ce2b1bf407 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -549,12 +549,12 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) { - int ret; crypto_mechanism_t mech; uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint_t enc_len, keydata_len, aad_len; + int ret; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); From 35155c013240ce14860b43ebc4803c2a5eea78f8 Mon Sep 17 00:00:00 2001 From: Fabian-Gruenbichler Date: Tue, 10 Dec 2019 21:53:25 +0100 Subject: [PATCH 231/325] SIMD: Use alloc_pages_node to force alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fxsave and xsave require the target address to be 16-/64-byte aligned. kmalloc(_node) does not (yet) offer such fine-grained control over alignment[0,1], even though it does "the right thing" most of the time for power-of-2 sizes. unfortunately, alignment is completely off when using certain debugging or hardening features/configs, such as KASAN, slub_debug=Z or the not-yet-upstream SLAB_CANARY. Use alloc_pages_node() instead which allows us to allocate page-aligned memory. Since fpregs_state is padded to a full page anyway, and this code is only relevant for x86 which has 4k pages, this approach should not allocate any unnecessary memory but still guarantee the needed alignment. 0: https://lwn.net/Articles/787740/ 1: https://lore.kernel.org/linux-block/20190826111627.7505-1-vbabka@suse.cz/ Reviewed-by: Tony Hutter Signed-off-by: Fabian Grünbichler Signed-off-by: Brian Behlendorf Closes #9608 Closes #9674 --- include/linux/simd_x86.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 69dbd557906..1bde1d7c924 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -144,6 +144,8 @@ */ #if defined(HAVE_KERNEL_FPU_INTERNAL) +#include + extern union fpregs_state **zfs_kfpu_fpregs; /* @@ -156,7 +158,8 @@ kfpu_fini(void) for_each_possible_cpu(cpu) { if (zfs_kfpu_fpregs[cpu] != NULL) { - kfree(zfs_kfpu_fpregs[cpu]); + free_pages((unsigned long)zfs_kfpu_fpregs[cpu], + get_order(sizeof (union fpregs_state))); } } @@ -166,20 +169,28 @@ kfpu_fini(void) static inline int kfpu_init(void) { - int cpu; - zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * sizeof (union fpregs_state *), GFP_KERNEL); if (zfs_kfpu_fpregs == NULL) return (-ENOMEM); + /* + * The fxsave and xsave operations require 16-/64-byte alignment of + * the target memory. Since kmalloc() provides no alignment + * guarantee instead use alloc_pages_node(). + */ + unsigned int order = get_order(sizeof (union fpregs_state)); + int cpu; + for_each_possible_cpu(cpu) { - zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (union fpregs_state), - GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); - if (zfs_kfpu_fpregs[cpu] == NULL) { + struct page *page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_ZERO, order); + if (page == NULL) { kfpu_fini(); return (-ENOMEM); } + + zfs_kfpu_fpregs[cpu] = page_address(page); } return (0); From ff3e2e3c7096200c4d4771f724762b15e1484259 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 1 Oct 2019 12:50:34 -0700 Subject: [PATCH 232/325] Perform KABI checks in parallel Reduce the time required for ./configure to perform the needed KABI checks by allowing kbuild to compile multiple test cases in parallel. This was accomplished by splitting each test's source code from the logic handling whether that code could be compiled or not. By introducing this split it's possible to minimize the number of times kbuild needs to be invoked. As importantly, it means all of the tests can be built in parallel. This does require a little extra care since we expect some tests to fail, so the --keep-going (-k) option must be provided otherwise some tests may not get compiled. Furthermore, since a failure during the kbuild modpost phase will result in an early exit; the final linking phase is limited to tests which passed the initial compilation and produced an object file. Once everything has been built the configure script proceeds as previously. The only significant difference is that it now merely needs to test for the existence of a .ko file to determine the result of a given test. This vastly speeds up the entire process. New test cases should use ZFS_LINUX_TEST_SRC to declare their test source code and ZFS_LINUX_TEST_RESULT to check the result. All of the existing kernel-*.m4 files have been updated accordingly, see config/kernel-current-time.m4 for a basic example. The legacy ZFS_LINUX_TRY_COMPILE macro has been kept to handle special cases but it's use is not encouraged. master (secs) patched (secs) ------------- ---------------- autogen.sh 61 68 configure 137 24 (~17% of current run time) make -j $(nproc) 44 44 make rpms 287 150 Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #8547 Closes #9132 Closes #9341 Conflicts: Makefile.am config/kernel-fpu.m4 --- .gitignore | 1 + Makefile.am | 9 +- config/iconv.m4 | 3 +- config/kernel-access-ok-type.m4 | 18 +- config/kernel-acl.m4 | 253 ++++-- config/kernel-aio-fsync.m4 | 14 +- config/kernel-automount.m4 | 13 +- config/kernel-bdev-logical-size.m4 | 17 +- config/kernel-bdev-physical-size.m4 | 17 +- config/kernel-bdi.m4 | 78 +- config/kernel-bio-bvec-iter.m4 | 13 +- config/kernel-bio-end-io-t-args.m4 | 34 +- config/kernel-bio-failfast.m4 | 39 +- config/kernel-bio-op.m4 | 74 +- config/kernel-bio-rw-barrier.m4 | 15 +- config/kernel-bio-rw-discard.m4 | 15 +- config/kernel-bio_set_dev.m4 | 51 +- config/kernel-blk-queue-bdi.m4 | 12 +- config/kernel-blk-queue-discard.m4 | 41 +- config/kernel-blk-queue-flags.m4 | 40 +- config/kernel-blk-queue-flush.m4 | 64 +- config/kernel-blk-queue-max-hw-sectors.m4 | 19 +- config/kernel-blk-queue-max-segments.m4 | 21 +- config/kernel-blk-queue-unplug.m4 | 48 +- config/kernel-blkdev-get-by-path.m4 | 15 +- config/kernel-blkdev-reread-part.m4 | 12 +- config/kernel-block-device-operations.m4 | 46 +- config/kernel-clear-inode.m4 | 13 +- config/kernel-commit-metadata.m4 | 15 +- config/kernel-config-defined.m4 | 183 ++++ config/kernel-create-nameidata.m4 | 15 +- config/kernel-ctl-table-name.m4 | 12 +- config/kernel-current-time.m4 | 13 +- config/kernel-current_bio_tail.m4 | 30 +- config/kernel-d-make-root.m4 | 17 - config/kernel-d-obtain-alias.m4 | 18 - config/kernel-d-prune-aliases.m4 | 19 - config/kernel-declare-event-class.m4 | 8 +- config/kernel-dentry-operations.m4 | 174 +++- config/kernel-dirty-inode.m4 | 15 +- config/kernel-discard-granularity.m4 | 15 +- config/kernel-elevator-change.m4 | 21 +- config/kernel-encode-fh-inode.m4 | 15 +- config/kernel-evict-inode.m4 | 13 +- config/kernel-fallocate-pax.m4 | 19 - config/kernel-fallocate.m4 | 50 +- config/kernel-file-dentry.m4 | 12 +- config/kernel-file-inode.m4 | 12 +- config/kernel-fmode-t.m4 | 15 +- config/kernel-follow-down-one.m4 | 12 +- config/kernel-fpu.m4 | 140 ++-- config/kernel-fst-mount.m4 | 13 +- config/kernel-fsync.m4 | 83 +- config/kernel-generic_io_acct.m4 | 63 +- config/kernel-generic_readlink.m4 | 15 +- config/kernel-get-disk-and-module.m4 | 13 +- config/kernel-get-disk-ro.m4 | 18 +- config/kernel-get-link.m4 | 126 +-- config/kernel-global_page_state.m4 | 72 +- config/kernel-group-info.m4 | 15 +- config/kernel-in-compat-syscall.m4 | 12 +- config/kernel-inode-getattr.m4 | 56 +- config/kernel-inode-lock.m4 | 15 +- config/kernel-inode-set-flags.m4 | 12 +- config/kernel-inode-set-iversion.m4 | 12 +- config/kernel-inode-times.m4 | 15 +- config/kernel-insert-inode-locked.m4 | 15 +- config/kernel-invalidate-bdev-args.m4 | 14 +- config/kernel-is_owner_or_cap.m4 | 31 +- config/kernel-kmap-atomic-args.m4 | 14 +- config/kernel-kmem-cache.m4 | 62 +- config/kernel-kstrtoul.m4 | 14 +- config/kernel-ktime_get_coarse_real_ts64.m4 | 15 +- config/kernel-kuid-helpers.m4 | 12 +- config/kernel-kuidgid.m4 | 26 +- config/kernel-lookup-bdev.m4 | 32 +- config/kernel-lookup-nameidata.m4 | 15 +- config/kernel-lseek-execute.m4 | 16 +- config/kernel-make-request-fn.m4 | 77 ++ config/kernel-misc-minor.m4 | 2 +- config/kernel-mk-request-fn.m4 | 65 -- config/kernel-mkdir-umode-t.m4 | 13 +- config/kernel-mod-param.m4 | 13 +- config/kernel-objtool.m4 | 47 +- config/kernel-open-bdev-exclusive.m4 | 15 +- config/kernel-pde-data.m4 | 14 +- config/kernel-put-link.m4 | 57 +- config/kernel-rename.m4 | 16 +- config/kernel-rw.m4 | 40 +- config/kernel-rwsem.m4 | 65 +- config/kernel-sched.m4 | 53 +- config/kernel-security-inode-init.m4 | 38 +- config/kernel-set-nlink.m4 | 15 +- config/kernel-setattr-prepare.m4 | 16 +- config/kernel-sget-args.m4 | 13 +- config/kernel-show-options.m4 | 21 +- config/kernel-shrink.m4 | 219 +++-- config/kernel-submit_bio.m4 | 12 +- config/kernel-super-userns.m4 | 14 +- config/kernel-timer.m4 | 52 +- config/kernel-tmpfile.m4 | 16 +- config/kernel-totalhigh_pages.m4 | 14 +- config/kernel-totalram-pages-func.m4 | 15 +- config/kernel-truncate-range.m4 | 13 +- config/kernel-truncate-setsize.m4 | 15 +- config/kernel-userns-capabilities.m4 | 48 +- ...urange-sleep.m4 => kernel-usleep_range.m4} | 19 +- config/kernel-vfs-direct_IO.m4 | 145 ++-- config/kernel-vfs-fsync.m4 | 12 +- config/kernel-vfs-getattr.m4 | 54 +- config/kernel-vfs-iterate.m4 | 84 +- config/kernel-vfs-rw-iterate.m4 | 76 +- config/kernel-wait.m4 | 73 +- config/kernel-xattr-handler.m4 | 442 +++++----- config/kernel-zlib.m4 | 57 +- config/kernel.m4 | 783 +++++++++++------- config/zfs-build.m4 | 11 + config/zfs-meta.m4 | 18 + 118 files changed, 3256 insertions(+), 2160 deletions(-) create mode 100644 config/kernel-config-defined.m4 delete mode 100644 config/kernel-d-make-root.m4 delete mode 100644 config/kernel-d-obtain-alias.m4 delete mode 100644 config/kernel-d-prune-aliases.m4 delete mode 100644 config/kernel-fallocate-pax.m4 create mode 100644 config/kernel-make-request-fn.m4 delete mode 100644 config/kernel-mk-request-fn.m4 rename config/{kernel-urange-sleep.m4 => kernel-usleep_range.m4} (60%) diff --git a/.gitignore b/.gitignore index ae9e22dfa7b..19377a7b126 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ Makefile.in # Top level generated files specific to this top level dir # /bin +/build /configure /config.log /config.status diff --git a/Makefile.am b/Makefile.am index da4f6407d18..70d9fd7fb6a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -44,8 +44,9 @@ gitrev: BUILT_SOURCES = gitrev -distclean-local: - -$(RM) -R autom4te*.cache +# Double-colon rules are allowed; there are multiple independent definitions. +distclean-local:: + -$(RM) -R autom4te*.cache build -find . \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS \ -o -name .pc -o -name .hg -o -name .git \) -prune -o \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ @@ -92,8 +93,8 @@ commitcheck: fi cstyle: - @find ${top_srcdir} -name '*.[hc]' ! -name 'zfs_config.*' \ - ! -name '*.mod.c' -type f \ + @find ${top_srcdir} -name build -prune -o -name '*.[hc]' \ + ! -name 'zfs_config.*' ! -name '*.mod.c' -type f \ -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+ shellcheck: diff --git a/config/iconv.m4 b/config/iconv.m4 index a285e9daa5e..fc915fde6c5 100644 --- a/config/iconv.m4 +++ b/config/iconv.m4 @@ -269,8 +269,7 @@ size_t iconv(); [am_cv_proto_iconv_arg1="const"]) am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"]) am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` - AC_MSG_RESULT([ - $am_cv_proto_iconv]) + AC_MSG_RESULT([$am_cv_proto_iconv]) else dnl When compiling GNU libiconv on a system that does not have iconv yet, dnl pick the POSIX compliant declaration without 'const'. diff --git a/config/kernel-access-ok-type.m4 b/config/kernel-access-ok-type.m4 index 3b2878a55cb..dc943345870 100644 --- a/config/kernel-access-ok-type.m4 +++ b/config/kernel-access-ok-type.m4 @@ -4,17 +4,23 @@ dnl # dnl # - access_ok(type, addr, size) dnl # + access_ok(addr, size) dnl # -AC_DEFUN([ZFS_AC_KERNEL_ACCESS_OK_TYPE], [ - AC_MSG_CHECKING([whether access_ok() has 'type' parameter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE], [ + ZFS_LINUX_TEST_SRC([access_ok_type], [ #include ],[ - const void __user __attribute__((unused)) *addr = (void *) 0xdeadbeef; + const void __user __attribute__((unused)) *addr = + (void *) 0xdeadbeef; unsigned long __attribute__((unused)) size = 1; int error __attribute__((unused)) = access_ok(0, addr, size); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACCESS_OK_TYPE], [ + AC_MSG_CHECKING([whether access_ok() has 'type' parameter]) + ZFS_LINUX_TEST_RESULT([access_ok_type], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ACCESS_OK_TYPE, 1, [kernel has access_ok with 'type' parameter]) + AC_DEFINE(HAVE_ACCESS_OK_TYPE, 1, + [kernel has access_ok with 'type' parameter]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 index 02cc020e5c9..68a72872d81 100644 --- a/config/kernel-acl.m4 +++ b/config/kernel-acl.m4 @@ -3,32 +3,26 @@ dnl # Check if posix_acl_release can be used from a ZFS_META_LICENSED dnl # module. The is_owner_or_cap macro was replaced by dnl # inode_owner_or_capable dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ - AC_MSG_CHECKING([whether posix_acl_release() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE], [ + ZFS_LINUX_TEST_SRC([posix_acl_release], [ #include #include #include - ],[ - struct posix_acl* tmp = posix_acl_alloc(1, 0); + ], [ + struct posix_acl *tmp = posix_acl_alloc(1, 0); posix_acl_release(tmp); - ],[ + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ + AC_MSG_CHECKING([whether posix_acl_release() is available]) + ZFS_LINUX_TEST_RESULT([posix_acl_release], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_RELEASE, 1, [posix_acl_release() is available]) AC_MSG_CHECKING([whether posix_acl_release() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct posix_acl* tmp = posix_acl_alloc(1, 0); - posix_acl_release(tmp); - ],[ + ZFS_LINUX_TEST_RESULT([posix_acl_release_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) @@ -46,24 +40,25 @@ dnl # set_cached_acl() and forget_cached_acl() changed from inline to dnl # EXPORT_SYMBOL. In the former case, they may not be usable because of dnl # posix_acl_release. In the latter case, we can always use them. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ - AC_MSG_CHECKING([whether set_cached_acl() is usable]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE], [ + ZFS_LINUX_TEST_SRC([set_cached_acl], [ #include #include #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ + ], [ struct inode *ip = NULL; struct posix_acl *acl = posix_acl_alloc(1, 0); set_cached_acl(ip, ACL_TYPE_ACCESS, acl); forget_cached_acl(ip, ACL_TYPE_ACCESS); - ],[ + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ + AC_MSG_CHECKING([whether set_cached_acl() is usable]) + ZFS_LINUX_TEST_RESULT([set_cached_acl_license], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SET_CACHED_ACL_USABLE, 1, - [posix_acl_release() is usable]) + [set_cached_acl() is usable]) ],[ AC_MSG_RESULT(no) ]) @@ -77,14 +72,25 @@ dnl # dnl # 3.14 API change, dnl # posix_acl_chmod() is changed to __posix_acl_chmod() dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ - AC_MSG_CHECKING([whether posix_acl_chmod exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD], [ + ZFS_LINUX_TEST_SRC([posix_acl_chmod], [ #include #include ],[ posix_acl_chmod(NULL, 0, 0) + ]) + + ZFS_LINUX_TEST_SRC([__posix_acl_chmod], [ + #include + #include ],[ + __posix_acl_chmod(NULL, 0, 0) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ + AC_MSG_CHECKING([whether posix_acl_chmod exists]) + ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, [posix_acl_chmod() exists]) ],[ @@ -92,14 +98,10 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ ]) AC_MSG_CHECKING([whether __posix_acl_chmod exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - __posix_acl_chmod(NULL, 0, 0) - ],[ + ZFS_LINUX_TEST_RESULT([__posix_acl_chmod], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, [__posix_acl_chmod() exists]) + AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, + [__posix_acl_chmod() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -109,18 +111,22 @@ dnl # dnl # 3.1 API change, dnl # posix_acl_equiv_mode now wants an umode_t* instead of a mode_t* dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ - AC_MSG_CHECKING([whether posix_acl_equiv_mode() wants umode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ + ZFS_LINUX_TEST_SRC([posix_acl_equiv_mode], [ #include #include ],[ umode_t tmp; posix_acl_equiv_mode(NULL,&tmp); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ + AC_MSG_CHECKING([whether posix_acl_equiv_mode() wants umode_t]) + ZFS_LINUX_TEST_RESULT([posix_acl_equiv_mode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T, 1, - [ posix_acl_equiv_mode wants umode_t*]) + [posix_acl_equiv_mode wants umode_t*]) ],[ AC_MSG_RESULT(no) ]) @@ -130,9 +136,8 @@ dnl # dnl # 4.8 API change, dnl # The function posix_acl_valid now must be passed a namespace. dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ - AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS], [ + ZFS_LINUX_TEST_SRC([posix_acl_valid_with_ns], [ #include #include ],[ @@ -141,7 +146,12 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ int error; error = posix_acl_valid(user_ns, acl); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ + AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) + ZFS_LINUX_TEST_RESULT([posix_acl_valid_with_ns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_VALID_WITH_NS, 1, [posix_acl_valid() wants user namespace]) @@ -155,9 +165,8 @@ dnl # 2.6.27 API change, dnl # Check if inode_operations contains the function permission dnl # and expects the nameidata structure to have been removed. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ - AC_MSG_CHECKING([whether iops->permission() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION], [ + ZFS_LINUX_TEST_SRC([inode_operations_permission], [ #include int permission_fn(struct inode *inode, int mask) { return 0; } @@ -166,8 +175,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ iops __attribute__ ((unused)) = { .permission = permission_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ + AC_MSG_CHECKING([whether iops->permission() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_permission], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists]) ],[ @@ -180,9 +193,8 @@ dnl # 2.6.26 API change, dnl # Check if inode_operations contains the function permission dnl # and expects the nameidata structure to be passed. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->permission() wants nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([inode_operations_permission_with_nameidata], [ #include #include @@ -193,8 +205,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ iops __attribute__ ((unused)) = { .permission = permission_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->permission() wants nameidata]) + ZFS_LINUX_TEST_RESULT([inode_operations_permission_with_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists]) AC_DEFINE(HAVE_PERMISSION_WITH_NAMEIDATA, 1, @@ -208,9 +224,8 @@ dnl # dnl # 2.6.32 API change, dnl # Check if inode_operations contains the function check_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ - AC_MSG_CHECKING([whether iops->check_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_check_acl], [ #include int check_acl_fn(struct inode *inode, int mask) { return 0; } @@ -219,8 +234,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ iops __attribute__ ((unused)) = { .check_acl = check_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ + AC_MSG_CHECKING([whether iops->check_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_check_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists]) ],[ @@ -232,9 +251,8 @@ dnl # dnl # 2.6.38 API change, dnl # The function check_acl gained a new parameter: flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ - AC_MSG_CHECKING([whether iops->check_acl() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_operations_check_acl_with_flags], [ #include int check_acl_fn(struct inode *inode, int mask, @@ -244,8 +262,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ iops __attribute__ ((unused)) = { .check_acl = check_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ + AC_MSG_CHECKING([whether iops->check_acl() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_check_acl_with_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists]) AC_DEFINE(HAVE_CHECK_ACL_WITH_FLAGS, 1, @@ -259,9 +281,8 @@ dnl # dnl # 3.1 API change, dnl # Check if inode_operations contains the function get_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ - AC_MSG_CHECKING([whether iops->get_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [ #include struct posix_acl *get_acl_fn(struct inode *inode, int type) @@ -271,8 +292,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ iops __attribute__ ((unused)) = { .get_acl = get_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ + AC_MSG_CHECKING([whether iops->get_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_get_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_ACL, 1, [iops->get_acl() exists]) ],[ @@ -284,20 +309,23 @@ dnl # dnl # 3.14 API change, dnl # Check if inode_operations contains the function set_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ - AC_MSG_CHECKING([whether iops->set_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_set_acl], [ #include - int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type) - { return 0; } + int set_acl_fn(struct inode *inode, struct posix_acl *acl, + int type) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { .set_acl = set_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ + AC_MSG_CHECKING([whether iops->set_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) ],[ @@ -311,16 +339,79 @@ dnl # The kernel get_acl will now check cache before calling i_op->get_acl and dnl # do set_cached_acl after that, so i_op->get_acl don't need to do that dnl # anymore. dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE], [ + ZFS_LINUX_TEST_SRC([get_acl_handle_cache], [ + #include + ],[ + void *sentinel __attribute__ ((unused)) = + uncached_acl_sentinel(NULL); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE], [ AC_MSG_CHECKING([whether uncached_acl_sentinel() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include + ZFS_LINUX_TEST_RESULT([get_acl_handle_cache], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, + [uncached_acl_sentinel() exists]) ],[ - void *sentinel __attribute__ ((unused)) = uncached_acl_sentinel(NULL); + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 4.16 kernel: check if struct posix_acl acl.a_refcount is a refcount_t. +dnl # It's an atomic_t on older kernels. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT], [ + ZFS_LINUX_TEST_SRC([acl_refcount], [ + #include + #include + #include ],[ + struct posix_acl acl; + refcount_t *r __attribute__ ((unused)) = &acl.a_refcount; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACL_HAS_REFCOUNT], [ + AC_MSG_CHECKING([whether posix_acl has refcount_t]) + ZFS_LINUX_TEST_RESULT([acl_refcount], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, [uncached_acl_sentinel() exists]) + AC_DEFINE(HAVE_ACL_REFCOUNT, 1, [posix_acl has refcount_t]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL], [ + ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE + ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE + ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD + ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T + ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL + ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE + ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACL], [ + ZFS_AC_KERNEL_POSIX_ACL_RELEASE + ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE + ZFS_AC_KERNEL_POSIX_ACL_CHMOD + ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T + ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS + ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION + ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA + ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL + ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS + ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL + ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL + ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE + ZFS_AC_KERNEL_ACL_HAS_REFCOUNT +]) diff --git a/config/kernel-aio-fsync.m4 b/config/kernel-aio-fsync.m4 index 41b7a98a6b0..b4dbf29ba78 100644 --- a/config/kernel-aio-fsync.m4 +++ b/config/kernel-aio-fsync.m4 @@ -1,21 +1,23 @@ dnl # dnl # Linux 4.9-rc5+ ABI, removal of the .aio_fsync field dnl # -AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ - AC_MSG_CHECKING([whether fops->aio_fsync() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_AIO_FSYNC], [ + ZFS_LINUX_TEST_SRC([aio_fsync], [ #include static const struct file_operations fops __attribute__ ((unused)) = { .aio_fsync = NULL, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ + AC_MSG_CHECKING([whether fops->aio_fsync() exists]) + ZFS_LINUX_TEST_RESULT([aio_fsync], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_AIO_FSYNC, 1, [fops->aio_fsync() exists]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-automount.m4 b/config/kernel-automount.m4 index 1ee4c168d43..93e14fa8d63 100644 --- a/config/kernel-automount.m4 +++ b/config/kernel-automount.m4 @@ -5,16 +5,19 @@ dnl # solution to handling automounts. Prior to this cifs/nfs clients dnl # which required automount support would abuse the follow_link() dnl # operation on directories for this purpose. dnl # -AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ - AC_MSG_CHECKING([whether dops->d_automount() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ + ZFS_LINUX_TEST_SRC([dentry_operations_d_automount], [ #include struct vfsmount *d_automount(struct path *p) { return NULL; } struct dentry_operations dops __attribute__ ((unused)) = { .d_automount = d_automount, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ + AC_MSG_CHECKING([whether dops->d_automount() exists]) + ZFS_LINUX_TEST_RESULT([dentry_operations_d_automount], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_AUTOMOUNT, 1, [dops->automount() exists]) ],[ diff --git a/config/kernel-bdev-logical-size.m4 b/config/kernel-bdev-logical-size.m4 index a6194577abb..0de9afd8888 100644 --- a/config/kernel-bdev-logical-size.m4 +++ b/config/kernel-bdev-logical-size.m4 @@ -5,21 +5,22 @@ dnl # it has been true for a while that there was no strict 1:1 mapping dnl # between physical sector size and logical block size this change makes dnl # it explicit. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE], [ - AC_MSG_CHECKING([whether bdev_logical_block_size() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE], [ + ZFS_LINUX_TEST_SRC([bdev_logical_block_size], [ #include ],[ struct block_device *bdev = NULL; bdev_logical_block_size(bdev); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether bdev_logical_block_size() is available]) + ZFS_LINUX_TEST_RESULT([bdev_logical_block_size], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BDEV_LOGICAL_BLOCK_SIZE, 1, - [bdev_logical_block_size() is available]) + [bdev_logical_block_size() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-bdev-physical-size.m4 b/config/kernel-bdev-physical-size.m4 index 77746ee9169..94d8172d3d7 100644 --- a/config/kernel-bdev-physical-size.m4 +++ b/config/kernel-bdev-physical-size.m4 @@ -19,21 +19,22 @@ dnl # dnl # Unfortunately, this interface isn't entirely reliable because dnl # drives are sometimes known to misreport this value. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [ - AC_MSG_CHECKING([whether bdev_physical_block_size() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE], [ + ZFS_LINUX_TEST_SRC([bdev_physical_block_size], [ #include ],[ struct block_device *bdev = NULL; bdev_physical_block_size(bdev); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether bdev_physical_block_size() is available]) + ZFS_LINUX_TEST_RESULT([bdev_physical_block_size], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BDEV_PHYSICAL_BLOCK_SIZE, 1, - [bdev_physical_block_size() is available]) + [bdev_physical_block_size() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-bdi.m4 b/config/kernel-bdi.m4 index cb7479ee9c4..51516332a94 100644 --- a/config/kernel-bdi.m4 +++ b/config/kernel-bdi.m4 @@ -1,55 +1,81 @@ dnl # -dnl # 2.6.32 - 2.6.33, bdi_setup_and_register() is not exported. -dnl # 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments. -dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. -dnl # 4.12 - x.y, super_setup_bdi_name() new interface. +dnl # Check available BDI interfaces. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDI], [ - AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDI], [ + ZFS_LINUX_TEST_SRC([super_setup_bdi_name], [ #include struct super_block sb; ], [ char *name = "bdi"; atomic_long_t zfs_bdi_seq; int error __attribute__((unused)) = - super_setup_bdi_name(&sb, "%.28s-%ld", name, atomic_long_inc_return(&zfs_bdi_seq)); - ], [super_setup_bdi_name], [fs/super.c], [ + super_setup_bdi_name(&sb, "%.28s-%ld", name, + atomic_long_inc_return(&zfs_bdi_seq)); + ]) + + ZFS_LINUX_TEST_SRC([bdi_setup_and_register], [ + #include + struct backing_dev_info bdi; + ], [ + char *name = "bdi"; + int error __attribute__((unused)) = + bdi_setup_and_register(&bdi, name); + ]) + + ZFS_LINUX_TEST_SRC([bdi_setup_and_register_3args], [ + #include + struct backing_dev_info bdi; + ], [ + char *name = "bdi"; + unsigned int cap = BDI_CAP_MAP_COPY; + int error __attribute__((unused)) = + bdi_setup_and_register(&bdi, name, cap); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDI], [ + dnl # + dnl # 4.12, super_setup_bdi_name() introduced. + dnl # + AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) + ZFS_LINUX_TEST_RESULT_SYMBOL([super_setup_bdi_name], + [super_setup_bdi_name], [fs/super.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_SETUP_BDI_NAME, 1, [super_setup_bdi_name() exits]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. + dnl # AC_MSG_CHECKING( [whether bdi_setup_and_register() wants 2 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name); - ], [bdi_setup_and_register], [mm/backing-dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL([bdi_setup_and_register], + [bdi_setup_and_register], [mm/backing-dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_BDI_SETUP_AND_REGISTER, 1, [bdi_setup_and_register() wants 2 args]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 2.6.34 - 3.19, bdi_setup_and_register() + dnl # takes 3 arguments. + dnl # AC_MSG_CHECKING( [whether bdi_setup_and_register() wants 3 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - unsigned int cap = BDI_CAP_MAP_COPY; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name, cap); - ], [bdi_setup_and_register], [mm/backing-dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL( + [bdi_setup_and_register_3args], + [bdi_setup_and_register], [mm/backing-dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_BDI_SETUP_AND_REGISTER, 1, [bdi_setup_and_register() wants 3 args]) ], [ + dnl # + dnl # 2.6.32 - 2.6.33, bdi_setup_and_register() + dnl # is not exported. + dnl # AC_MSG_RESULT(no) ]) ]) diff --git a/config/kernel-bio-bvec-iter.m4 b/config/kernel-bio-bvec-iter.m4 index 64c989386b3..f9a99cee6b4 100644 --- a/config/kernel-bio-bvec-iter.m4 +++ b/config/kernel-bio-bvec-iter.m4 @@ -3,18 +3,21 @@ dnl # 3.14 API change, dnl # Immutable biovecs. A number of fields of struct bio are moved to dnl # struct bvec_iter. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ - AC_MSG_CHECKING([whether bio has bi_iter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [ + ZFS_LINUX_TEST_SRC([bio_bvec_iter], [ #include ],[ struct bio bio; bio.bi_iter.bi_sector = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ + AC_MSG_CHECKING([whether bio has bi_iter]) + ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-bio-end-io-t-args.m4 b/config/kernel-bio-end-io-t-args.m4 index 3c420cc0c30..80a1fbedad9 100644 --- a/config/kernel-bio-end-io-t-args.m4 +++ b/config/kernel-bio-end-io-t-args.m4 @@ -5,20 +5,21 @@ dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE. dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451 dnl # ("block: add a bi_error field to struct bio"). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ - AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ + ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ #include - void wanted_end_io(struct bio *bio) { return; } - bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; - ],[ - ],[ + ], []) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ + AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) + ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1, - [bio_end_io_t wants 1 arg]) - ],[ + [bio_end_io_t wants 1 arg]) + ], [ AC_MSG_RESULT(no) ]) ]) @@ -28,16 +29,19 @@ dnl # 4.13 API change dnl # The bio->bi_error field was replaced with bio->bi_status which is an dnl # enum which describes all possible error types. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ - AC_MSG_CHECKING([whether bio->bi_status exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [ + ZFS_LINUX_TEST_SRC([bio_bi_status], [ #include - ],[ + ], [ struct bio bio __attribute__ ((unused)); blk_status_t status __attribute__ ((unused)) = BLK_STS_OK; - bio.bi_status = status; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ + AC_MSG_CHECKING([whether bio->bi_status exists]) + ZFS_LINUX_TEST_RESULT([bio_bi_status], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists]) ],[ diff --git a/config/kernel-bio-failfast.m4 b/config/kernel-bio-failfast.m4 index cfbec05238c..0c636f08cc8 100644 --- a/config/kernel-bio-failfast.m4 +++ b/config/kernel-bio-failfast.m4 @@ -3,37 +3,54 @@ dnl # Preferred interface for setting FAILFAST on a bio: dnl # 2.6.28-2.6.35: BIO_RW_FAILFAST_{DEV|TRANSPORT|DRIVER} dnl # >= 2.6.36: REQ_FAILFAST_{DEV|TRANSPORT|DRIVER} dnl # - -AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST_DTD], [ - AC_MSG_CHECKING([whether BIO_RW_FAILFAST_* are defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD], [ + ZFS_LINUX_TEST_SRC([bio_failfast_dtd], [ #include ],[ int flags __attribute__ ((unused)); flags = ((1 << BIO_RW_FAILFAST_DEV) | (1 << BIO_RW_FAILFAST_TRANSPORT) | (1 << BIO_RW_FAILFAST_DRIVER)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST_DTD], [ + AC_MSG_CHECKING([whether BIO_RW_FAILFAST_* are defined]) + ZFS_LINUX_TEST_RESULT([bio_failfast_dtd], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_FAILFAST_DTD, 1, - [BIO_RW_FAILFAST_* are defined]) + [BIO_RW_FAILFAST_* are defined]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_REQ_FAILFAST_MASK], [ - AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK], [ + ZFS_LINUX_TEST_SRC([bio_failfast_mask], [ #include ],[ int flags __attribute__ ((unused)); flags = REQ_FAILFAST_MASK; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_FAILFAST_MASK], [ + AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) + ZFS_LINUX_TEST_RESULT([bio_failfast_mask], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_FAILFAST_MASK, 1, - [REQ_FAILFAST_MASK is defined]) + [REQ_FAILFAST_MASK is defined]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST], [ + ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST], [ + ZFS_AC_KERNEL_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_REQ_FAILFAST_MASK +]) diff --git a/config/kernel-bio-op.m4 b/config/kernel-bio-op.m4 index 8299e490c2c..1f2d23791ae 100644 --- a/config/kernel-bio-op.m4 +++ b/config/kernel-bio-op.m4 @@ -5,13 +5,43 @@ dnl # The bio_op() helper was introduced as a replacement for explicitly dnl # checking the bio->bi_rw flags. The following checks are used to dnl # detect if a specific operation is supported. dnl # -AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ - AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ + ZFS_LINUX_TEST_SRC([req_op_discard], [ #include ],[ int op __attribute__ ((unused)) = REQ_OP_DISCARD; + ]) + + ZFS_LINUX_TEST_SRC([req_op_secure_erase], [ + #include ],[ + int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; + ]) + + ZFS_LINUX_TEST_SRC([req_op_flush], [ + #include + ],[ + int op __attribute__ ((unused)) = REQ_OP_FLUSH; + ]) + + ZFS_LINUX_TEST_SRC([bio_bi_opf], [ + #include + ],[ + struct bio bio __attribute__ ((unused)); + bio.bi_opf = 0; + ]) + + ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [ + #include + ],[ + struct bio *bio __attribute__ ((unused)) = NULL; + bio_set_op_attrs(bio, 0, 0); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ + AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) + ZFS_LINUX_TEST_RESULT([req_op_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, [REQ_OP_DISCARD is defined]) @@ -22,11 +52,7 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; - ],[ + ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1, [REQ_OP_SECURE_ERASE is defined]) @@ -38,14 +64,9 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_FLUSH; - ],[ + ZFS_LINUX_TEST_RESULT([req_op_flush], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, - [REQ_OP_FLUSH is defined]) + AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined]) ],[ AC_MSG_RESULT(no) ]) @@ -53,12 +74,7 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ AC_MSG_CHECKING([whether bio->bi_opf is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct bio bio __attribute__ ((unused)); - bio.bi_opf = 0; - ],[ + ZFS_LINUX_TEST_RESULT([bio_bi_opf], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) ],[ @@ -68,13 +84,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ AC_DEFUN([ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS], [ AC_MSG_CHECKING([whether bio_set_op_attrs is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct bio *bio __attribute__ ((unused)) = NULL; - - bio_set_op_attrs(bio, 0, 0); - ],[ + ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_SET_OP_ATTRS, 1, [bio_set_op_attrs is available]) @@ -82,3 +92,11 @@ AC_DEFUN([ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_OPS], [ + ZFS_AC_KERNEL_REQ_OP_DISCARD + ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE + ZFS_AC_KERNEL_REQ_OP_FLUSH + ZFS_AC_KERNEL_BIO_BI_OPF + ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS +]) diff --git a/config/kernel-bio-rw-barrier.m4 b/config/kernel-bio-rw-barrier.m4 index bcf0f7ea00b..f667d48844f 100644 --- a/config/kernel-bio-rw-barrier.m4 +++ b/config/kernel-bio-rw-barrier.m4 @@ -3,20 +3,25 @@ dnl # Interface for issuing a discard bio: dnl # 2.6.28-2.6.35: BIO_RW_BARRIER dnl # 2.6.36-3.x: REQ_BARRIER dnl # - +dnl # dnl # Since REQ_BARRIER is a preprocessor definition, there is no need for an dnl # autotools check for it. Also, REQ_BARRIER existed in the request layer dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the dnl # request layer and bio layer flags, so it would be wrong to assume that dnl # the APIs are mutually exclusive contrary to the typical case. -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [ - AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined]) - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER], [ + ZFS_LINUX_TEST_SRC([bio_rw_barrier], [ #include ],[ int flags __attribute__ ((unused)); flags = BIO_RW_BARRIER; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [ + AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined]) + ZFS_LINUX_TEST_RESULT([bio_rw_barrier], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_BARRIER, 1, [BIO_RW_BARRIER is defined]) ],[ diff --git a/config/kernel-bio-rw-discard.m4 b/config/kernel-bio-rw-discard.m4 index 0554b9a9dae..34a89279c20 100644 --- a/config/kernel-bio-rw-discard.m4 +++ b/config/kernel-bio-rw-discard.m4 @@ -3,20 +3,25 @@ dnl # Interface for issuing a discard bio: dnl # 2.6.28-2.6.35: BIO_RW_DISCARD dnl # 2.6.36-3.x: REQ_DISCARD dnl # - +dnl # dnl # Since REQ_DISCARD is a preprocessor definition, there is no need for an dnl # autotools check for it. Also, REQ_DISCARD existed in the request layer dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the dnl # request layer and bio layer flags, so it would be wrong to assume that dnl # the APIs are mutually exclusive contrary to the typical case. -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [ - AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined]) - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD], [ + ZFS_LINUX_TEST_SRC([bio_rw_discard], [ #include ],[ int flags __attribute__ ((unused)); flags = BIO_RW_DISCARD; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [ + AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined]) + ZFS_LINUX_TEST_RESULT([bio_rw_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_DISCARD, 1, [BIO_RW_DISCARD is defined]) ],[ diff --git a/config/kernel-bio_set_dev.m4 b/config/kernel-bio_set_dev.m4 index 71d47a89309..b8e13f35ac0 100644 --- a/config/kernel-bio_set_dev.m4 +++ b/config/kernel-bio_set_dev.m4 @@ -3,51 +3,38 @@ dnl # Linux 4.14 API, dnl # dnl # The bio_set_dev() helper macro was introduced as part of the transition dnl # to have struct gendisk in struct bio. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV_MACRO], [ - AC_MSG_CHECKING([whether bio_set_dev() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct block_device *bdev = NULL; - struct bio *bio = NULL; - bio_set_dev(bio, bdev); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 5.0 API, dnl # dnl # The bio_set_dev() helper macro was updated to internally depend on dnl # bio_associate_blkg() symbol which is exported GPL-only. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV_GPL_ONLY], [ - AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [ + ZFS_LINUX_TEST_SRC([bio_set_dev], [ #include #include - MODULE_LICENSE("$ZFS_META_LICENSE"); ],[ struct block_device *bdev = NULL; struct bio *bio = NULL; bio_set_dev(bio, bdev); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, - [bio_set_dev() GPL-only]) - ]) + ], [], [$ZFS_META_LICENSE]) ]) AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [ - ZFS_AC_KERNEL_BIO_SET_DEV_MACRO - ZFS_AC_KERNEL_BIO_SET_DEV_GPL_ONLY + AC_MSG_CHECKING([whether bio_set_dev() is available]) + ZFS_LINUX_TEST_RESULT([bio_set_dev], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available]) + + AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ + AC_MSG_RESULT(no) + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, + [bio_set_dev() GPL-only]) + ]) + ],[ + AC_MSG_RESULT(no) + ]) ]) diff --git a/config/kernel-blk-queue-bdi.m4 b/config/kernel-blk-queue-bdi.m4 index 816471166a5..28241c4944c 100644 --- a/config/kernel-blk-queue-bdi.m4 +++ b/config/kernel-blk-queue-bdi.m4 @@ -2,15 +2,19 @@ dnl # dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue dnl # 4.12 - x.y, dynamically allocated bdi in request_queue dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [ - AC_MSG_CHECKING([whether blk_queue bdi is dynamic]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [ + ZFS_LINUX_TEST_SRC([blk_queue_bdi], [ #include ],[ struct request_queue q; struct backing_dev_info bdi; q.backing_dev_info = &bdi; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [ + AC_MSG_CHECKING([whether blk_queue bdi is dynamic]) + ZFS_LINUX_TEST_RESULT([blk_queue_bdi], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1, [blk queue backing_dev_info is dynamic]) diff --git a/config/kernel-blk-queue-discard.m4 b/config/kernel-blk-queue-discard.m4 index addbba81447..85a29356def 100644 --- a/config/kernel-blk-queue-discard.m4 +++ b/config/kernel-blk-queue-discard.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.32 - 4.x API, dnl # blk_queue_discard() dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ - AC_MSG_CHECKING([whether blk_queue_discard() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [ + ZFS_LINUX_TEST_SRC([blk_queue_discard], [ #include ],[ struct request_queue *q __attribute__ ((unused)) = NULL; int value __attribute__ ((unused)); - value = blk_queue_discard(q); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ + AC_MSG_CHECKING([whether blk_queue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1, [blk_queue_discard() is available]) @@ -30,16 +33,27 @@ dnl # dnl # 2.6.x - 2.6.35 API, dnl # Unsupported by kernel dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ - AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ + ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [ #include ],[ struct request_queue *q __attribute__ ((unused)) = NULL; int value __attribute__ ((unused)); - value = blk_queue_secure_erase(q); + ]) + + ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [ + #include ],[ + struct request_queue *q __attribute__ ((unused)) = NULL; + int value __attribute__ ((unused)); + value = blk_queue_secdiscard(q); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ + AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, [blk_queue_secure_erase() is available]) @@ -47,14 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct request_queue *q __attribute__ ((unused)) = NULL; - int value __attribute__ ((unused)); - - value = blk_queue_secdiscard(q); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, [blk_queue_secdiscard() is available]) diff --git a/config/kernel-blk-queue-flags.m4 b/config/kernel-blk-queue-flags.m4 index b570245c74d..9d4dfc159e8 100644 --- a/config/kernel-blk-queue-flags.m4 +++ b/config/kernel-blk-queue-flags.m4 @@ -3,36 +3,54 @@ dnl # API change dnl # https://github.com/torvalds/linux/commit/8814ce8 dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear dnl # - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ - AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [ + ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [ #include #include ],[ struct request_queue *q = NULL; blk_queue_flag_set(0, q); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ + AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) + ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, [blk_queue_flag_set() exists]) + AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, + [blk_queue_flag_set() exists]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ - AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [ + ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [ #include #include ],[ struct request_queue *q = NULL; blk_queue_flag_clear(0, q); - ],[ + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ + AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) + ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, [blk_queue_flag_clear() exists]) + AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, + [blk_queue_flag_clear() exists]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS], [ + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAGS], [ + ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR +]) diff --git a/config/kernel-blk-queue-flush.m4 b/config/kernel-blk-queue-flush.m4 index 1baab83a4e8..b546d940018 100644 --- a/config/kernel-blk-queue-flush.m4 +++ b/config/kernel-blk-queue-flush.m4 @@ -9,35 +9,37 @@ dnl # there we implement our own compatibility function, otherwise dnl # we use the function. The hope is that long term this function dnl # will be opened up. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ - AC_MSG_CHECKING([whether blk_queue_flush() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [ + ZFS_LINUX_TEST_SRC([blk_queue_flush], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_flush(q, REQ_FLUSH); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [ + #include + #include + ], [ + struct request_queue *q = NULL; + blk_queue_write_cache(q, true, true); + ], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ + AC_MSG_CHECKING([whether blk_queue_flush() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_flush], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1, - [blk_queue_flush() is available]) + [blk_queue_flush() is available]) AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request_queue *q = NULL; - (void) blk_queue_flush(q, REQ_FLUSH); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1, - [blk_queue_flush() is GPL-only]) + [blk_queue_flush() is GPL-only]) ]) ],[ AC_MSG_RESULT(no) @@ -48,38 +50,20 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ dnl # Replace blk_queue_flush with blk_queue_write_cache dnl # AC_MSG_CHECKING([whether blk_queue_write_cache() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - - ],[ - struct request_queue *q = NULL; - blk_queue_write_cache(q, true, true); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1, - [blk_queue_write_cache() exists]) + [blk_queue_write_cache() exists]) AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request_queue *q = NULL; - blk_queue_write_cache(q, true, true); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1, - [blk_queue_write_cache() is GPL-only]) + [blk_queue_write_cache() is GPL-only]) ]) ],[ AC_MSG_RESULT(no) ]) - - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-max-hw-sectors.m4 b/config/kernel-blk-queue-max-hw-sectors.m4 index 2f5515dc6b7..7387f84de74 100644 --- a/config/kernel-blk-queue-max-hw-sectors.m4 +++ b/config/kernel-blk-queue-max-hw-sectors.m4 @@ -2,21 +2,22 @@ dnl # dnl # 2.6.34 API change dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [ - AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [ + ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [ + AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_max_hw_sectors], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1, - [blk_queue_max_hw_sectors() is available]) + [blk_queue_max_hw_sectors() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-max-segments.m4 b/config/kernel-blk-queue-max-segments.m4 index b2a40423a5c..1e4092df9ac 100644 --- a/config/kernel-blk-queue-max-segments.m4 +++ b/config/kernel-blk-queue-max-segments.m4 @@ -3,21 +3,22 @@ dnl # 2.6.34 API change dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments() dnl # and blk_queue_max_phys_segments(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ - AC_MSG_CHECKING([whether blk_queue_max_segments() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [ + ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ + AC_MSG_CHECKING([whether blk_queue_max_segments() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_max_segments], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1, - [blk_queue_max_segments() is available]) - ],[ + [blk_queue_max_segments() is available]) + ], [ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-unplug.m4 b/config/kernel-blk-queue-unplug.m4 index 075fbccd1a5..f5d1814b83a 100644 --- a/config/kernel-blk-queue-unplug.m4 +++ b/config/kernel-blk-queue-unplug.m4 @@ -2,43 +2,53 @@ dnl # dnl # 2.6.32-2.6.35 API - The BIO_RW_UNPLUG enum can be used as a hint dnl # to unplug the queue. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG], [ - AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG], [ + ZFS_LINUX_TEST_SRC([blk_queue_bio_rw_unplug], [ #include ],[ - extern enum bio_rw_flags rw; + enum bio_rw_flags rw __attribute__ ((unused)) = BIO_RW_UNPLUG; + ]) +]) - rw = BIO_RW_UNPLUG; - ],[ +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_UNPLUG], [ + AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_bio_rw_unplug], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG, 1, - [BIO_RW_UNPLUG is available]) + [BIO_RW_UNPLUG is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG], [ - AC_MSG_CHECKING([whether struct blk_plug is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_PLUG], [ + ZFS_LINUX_TEST_SRC([blk_plug], [ #include ],[ - struct blk_plug plug; + struct blk_plug plug __attribute__ ((unused)); blk_start_plug(&plug); blk_finish_plug(&plug); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_PLUG], [ + AC_MSG_CHECKING([whether struct blk_plug is available]) + ZFS_LINUX_TEST_RESULT([blk_plug], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BLK_PLUG, 1, - [struct blk_plug is available]) + [struct blk_plug is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG], [ + ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG + ZFS_AC_KERNEL_SRC_BLK_PLUG +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [ + ZFS_AC_KERNEL_BIO_RW_UNPLUG + ZFS_AC_KERNEL_BLK_PLUG ]) diff --git a/config/kernel-blkdev-get-by-path.m4 b/config/kernel-blkdev-get-by-path.m4 index 40ecc06b6c9..fb0cea6af59 100644 --- a/config/kernel-blkdev-get-by-path.m4 +++ b/config/kernel-blkdev-get-by-path.m4 @@ -3,16 +3,21 @@ dnl # 2.6.38 API change dnl # open_bdev_exclusive() changed to blkdev_get_by_path() dnl # close_bdev_exclusive() changed to blkdev_put() dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], - [AC_MSG_CHECKING([whether blkdev_get_by_path() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [ + ZFS_LINUX_TEST_SRC([blkdev_get_by_path], [ #include ], [ blkdev_get_by_path(NULL, 0, NULL); - ], [blkdev_get_by_path], [fs/block_dev.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ + AC_MSG_CHECKING([whether blkdev_get_by_path() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([blkdev_get_by_path], + [blkdev_get_by_path], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH, 1, - [blkdev_get_by_path() is available]) + [blkdev_get_by_path() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-blkdev-reread-part.m4 b/config/kernel-blkdev-reread-part.m4 index 5664769a309..1bf1e7c3a24 100644 --- a/config/kernel-blkdev-reread-part.m4 +++ b/config/kernel-blkdev-reread-part.m4 @@ -2,16 +2,20 @@ dnl # dnl # 4.1 API, exported blkdev_reread_part() symbol, backported to the dnl # 3.10.0 CentOS 7.x enterprise kernels. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [ - AC_MSG_CHECKING([whether blkdev_reread_part() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART], [ + ZFS_LINUX_TEST_SRC([blkdev_reread_part], [ #include ], [ struct block_device *bdev = NULL; int error; error = blkdev_reread_part(bdev); - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [ + AC_MSG_CHECKING([whether blkdev_reread_part() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_reread_part], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1, [blkdev_reread_part() is available]) diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4 index 5f2811c1534..c3d5eec5294 100644 --- a/config/kernel-block-device-operations.m4 +++ b/config/kernel-block-device-operations.m4 @@ -1,11 +1,8 @@ dnl # dnl # 2.6.38 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ - AC_MSG_CHECKING([whether bops->check_events() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ + ZFS_LINUX_TEST_SRC([block_device_operations_check_events], [ #include unsigned int blk_check_events(struct gendisk *disk, @@ -15,25 +12,25 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ bops __attribute__ ((unused)) = { .check_events = blk_check_events, }; - ],[ - ],[ + ], [], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ + AC_MSG_CHECKING([whether bops->check_events() exists]) + ZFS_LINUX_TEST_RESULT([block_device_operations_check_events], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1, [bops->check_events() exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # dnl # 3.10.x API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ - AC_MSG_CHECKING([whether bops->release() is void]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ + ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [ #include void blk_release(struct gendisk *g, fmode_t mode) { return; } @@ -45,13 +42,26 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ .ioctl = NULL, .compat_ioctl = NULL, }; - ],[ - ],[ - AC_MSG_RESULT(void) + ], [], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ + AC_MSG_CHECKING([whether bops->release() is void]) + ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [ + AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, [bops->release() returns void]) ],[ - AC_MSG_RESULT(int) + AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [ + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS], [ + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ]) diff --git a/config/kernel-clear-inode.m4 b/config/kernel-clear-inode.m4 index 8d880fcd8f5..3f454d7ec0d 100644 --- a/config/kernel-clear-inode.m4 +++ b/config/kernel-clear-inode.m4 @@ -19,13 +19,18 @@ dnl # Therefore, to ensure we have the correct API we only allow the dnl # clear_inode() compatibility code to be defined iff the evict_inode() dnl # functionality is also detected. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], - [AC_MSG_CHECKING([whether clear_inode() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CLEAR_INODE], [ + ZFS_LINUX_TEST_SRC([clear_inode], [ #include ], [ clear_inode(NULL); - ], [clear_inode], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], [ + AC_MSG_CHECKING([whether clear_inode() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([clear_inode], + [clear_inode], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CLEAR_INODE, 1, [clear_inode() is available]) ], [ diff --git a/config/kernel-commit-metadata.m4 b/config/kernel-commit-metadata.m4 index b66a16fd212..9bc3b6622bb 100644 --- a/config/kernel-commit-metadata.m4 +++ b/config/kernel-commit-metadata.m4 @@ -4,19 +4,22 @@ dnl # Added eops->commit_metadata() callback to allow the underlying dnl # filesystem to determine the most efficient way to commit the inode. dnl # Prior to this the nfs server would issue an explicit fsync(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_COMMIT_METADATA], [ - AC_MSG_CHECKING([whether eops->commit_metadata() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_COMMIT_METADATA], [ + ZFS_LINUX_TEST_SRC([export_operations_commit_metadata], [ #include int commit_metadata(struct inode *inode) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .commit_metadata = commit_metadata, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_COMMIT_METADATA], [ + AC_MSG_CHECKING([whether eops->commit_metadata() exists]) + ZFS_LINUX_TEST_RESULT([export_operations_commit_metadata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_COMMIT_METADATA, 1, - [eops->commit_metadata() exists]) + [eops->commit_metadata() exists]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-config-defined.m4 b/config/kernel-config-defined.m4 new file mode 100644 index 00000000000..0ee4231cc2d --- /dev/null +++ b/config/kernel-config-defined.m4 @@ -0,0 +1,183 @@ +dnl # +dnl # Certain kernel build options are not supported. These must be +dnl # detected at configure time and cause a build failure. Otherwise +dnl # modules may be successfully built that behave incorrectly. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEFINED], [ + AS_IF([test "x$cross_compiling" != xyes], [ + AC_RUN_IFELSE([ + AC_LANG_PROGRAM([ + #include "$LINUX/include/linux/license.h" + ], [ + return !license_is_gpl_compatible( + "$ZFS_META_LICENSE"); + ]) + ], [ + AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], + [Define to 1 if GPL-only symbols can be used]) + ], [ + ]) + ]) + + ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE + ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC + ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS + ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE + ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE + + AC_MSG_CHECKING([for kernel config option compatibility]) + ZFS_LINUX_TEST_COMPILE_ALL([config]) + AC_MSG_RESULT([done]) + + ZFS_AC_KERNEL_CONFIG_THREAD_SIZE + ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC + ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS + ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE + ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE +]) + +dnl # +dnl # Check configured THREAD_SIZE +dnl # +dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 +dnl # the default thread stack size was increased to 16K from 8K. Therefore, +dnl # on newer kernels and some architectures stack usage optimizations can be +dnl # conditionally applied to improve performance without negatively impacting +dnl # stability. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE], [ + ZFS_LINUX_TEST_SRC([config_thread_size], [ + #include + ],[ + #if (THREAD_SIZE < 16384) + #error "THREAD_SIZE is less than 16K" + #endif + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ + AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) + ZFS_LINUX_TEST_RESULT([config_thread_size], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # Check CONFIG_DEBUG_LOCK_ALLOC +dnl # +dnl # This is typically only set for debug kernels because it comes with +dnl # a performance penalty. However, when it is set it maps the non-GPL +dnl # symbol mutex_lock() to the GPL-only mutex_lock_nested() symbol. +dnl # This will cause a failure at link time which we'd rather know about +dnl # at compile time. +dnl # +dnl # Since we plan to pursue making mutex_lock_nested() a non-GPL symbol +dnl # with the upstream community we add a check to detect this case. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC], [ + ZFS_LINUX_TEST_SRC([config_debug_lock_alloc], [ + #include + ],[ + struct mutex lock; + + mutex_init(&lock); + mutex_lock(&lock); + mutex_unlock(&lock); + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [ + AC_MSG_CHECKING([whether mutex_lock() is GPL-only]) + ZFS_LINUX_TEST_RESULT([config_debug_lock_alloc], [ + AC_MSG_RESULT(no) + ],[ + AC_MSG_RESULT(yes) + AC_MSG_ERROR([ + *** Kernel built with CONFIG_DEBUG_LOCK_ALLOC which is incompatible + *** with the CDDL license and will prevent the module linking stage + *** from succeeding. You must rebuild your kernel without this + *** option enabled.]) + ]) +]) + +dnl # +dnl # Check CONFIG_TRIM_UNUSED_KSYMS +dnl # +dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS disabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS], [ + ZFS_LINUX_TEST_SRC([config_trim_unusued_ksyms], [ + #if defined(CONFIG_TRIM_UNUSED_KSYMS) + #error CONFIG_TRIM_UNUSED_KSYMS not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS], [ + AC_MSG_CHECKING([whether CONFIG_TRIM_UNUSED_KSYM is disabled]) + ZFS_LINUX_TEST_RESULT([config_trim_unusued_ksyms], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AS_IF([test "x$enable_linux_builtin" != xyes], [ + AC_MSG_ERROR([ + *** This kernel has unused symbols trimming enabled, please disable. + *** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.]) + ]) + ]) +]) + +dnl # +dnl # Check CONFIG_ZLIB_INFLATE +dnl # +dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE], [ + ZFS_LINUX_TEST_SRC([config_zlib_inflate], [ + #if !defined(CONFIG_ZLIB_INFLATE) && \ + !defined(CONFIG_ZLIB_INFLATE_MODULE) + #error CONFIG_ZLIB_INFLATE not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE], [ + AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined]) + ZFS_LINUX_TEST_RESULT([config_zlib_inflate], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + *** This kernel does not include the required zlib inflate support. + *** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.]) + ]) +]) + +dnl # +dnl # Check CONFIG_ZLIB_DEFLATE +dnl # +dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE], [ + ZFS_LINUX_TEST_SRC([config_zlib_deflate], [ + #if !defined(CONFIG_ZLIB_DEFLATE) && \ + !defined(CONFIG_ZLIB_DEFLATE_MODULE) + #error CONFIG_ZLIB_DEFLATE not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE], [ + AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined]) + ZFS_LINUX_TEST_RESULT([config_zlib_deflate], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + *** This kernel does not include the required zlib deflate support. + *** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.]) + ]) +]) diff --git a/config/kernel-create-nameidata.m4 b/config/kernel-create-nameidata.m4 index d4c155c57fc..c43ca5b8569 100644 --- a/config/kernel-create-nameidata.m4 +++ b/config/kernel-create-nameidata.m4 @@ -1,9 +1,8 @@ dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->create() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([create_nameidata], [ #include #include @@ -19,11 +18,15 @@ AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ iops __attribute__ ((unused)) = { .create = inode_create, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->create() passes nameidata]) + ZFS_LINUX_TEST_RESULT([create_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CREATE_NAMEIDATA, 1, - [iops->create() passes nameidata]) + [iops->create() passes nameidata]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-ctl-table-name.m4 b/config/kernel-ctl-table-name.m4 index 3ce499968ff..16f2ad54451 100644 --- a/config/kernel-ctl-table-name.m4 +++ b/config/kernel-ctl-table-name.m4 @@ -2,14 +2,18 @@ dnl # dnl # 2.6.33 API change, dnl # Removed .ctl_name from struct ctl_table. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CTL_NAME], [ - AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CTL_NAME], [ + ZFS_LINUX_TEST_SRC([ctl_name], [ #include ],[ struct ctl_table ctl __attribute__ ((unused)); ctl.ctl_name = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CTL_NAME], [ + AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) + ZFS_LINUX_TEST_RESULT([ctl_name], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CTL_NAME, 1, [struct ctl_table has ctl_name]) ],[ diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 index c7d5c9b5200..3ceb5f63efa 100644 --- a/config/kernel-current-time.m4 +++ b/config/kernel-current-time.m4 @@ -2,14 +2,19 @@ dnl # dnl # 4.9, current_time() added dnl # 4.18, return type changed from timespec to timespec64 dnl # -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], - [AC_MSG_CHECKING([whether current_time() exists]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [ + ZFS_LINUX_TEST_SRC([current_time], [ #include ], [ struct inode ip __attribute__ ((unused)); ip.i_atime = current_time(&ip); - ], [current_time], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], [ + AC_MSG_CHECKING([whether current_time() exists]) + ZFS_LINUX_TEST_RESULT_SYMBOL([current_time], + [current_time], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) ], [ diff --git a/config/kernel-current_bio_tail.m4 b/config/kernel-current_bio_tail.m4 index b72f21e8a35..9dfc3e6e0da 100644 --- a/config/kernel-current_bio_tail.m4 +++ b/config/kernel-current_bio_tail.m4 @@ -4,30 +4,36 @@ dnl # current->bio_tail and current->bio_list were struct bio pointers prior to dnl # Linux 2.6.34. They were refactored into a struct bio_list pointer called dnl # current->bio_list in Linux 2.6.34. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [ - AC_MSG_CHECKING([whether current->bio_tail exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL], [ + ZFS_LINUX_TEST_SRC([current_bio_tail], [ #include - ],[ + ], [ current->bio_tail = (struct bio **) NULL; - ],[ + ]) + + ZFS_LINUX_TEST_SRC([current_bio_list], [ + #include + ], [ + current->bio_list = (struct bio_list *) NULL; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [ + AC_MSG_CHECKING([whether current->bio_tail exists]) + ZFS_LINUX_TEST_RESULT([current_bio_tail], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_BIO_TAIL, 1, [current->bio_tail exists]) ],[ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether current->bio_list exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - current->bio_list = (struct bio_list *) NULL; - ],[ + ZFS_LINUX_TEST_RESULT([current_bio_list], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_BIO_LIST, 1, [current->bio_list exists]) ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) + ZFS_LINUX_TEST_ERROR([bio_list]) ]) ]) ]) diff --git a/config/kernel-d-make-root.m4 b/config/kernel-d-make-root.m4 deleted file mode 100644 index 9c2b73dcbf5..00000000000 --- a/config/kernel-d-make-root.m4 +++ /dev/null @@ -1,17 +0,0 @@ -dnl # -dnl # 3.4.0 API change -dnl # Added d_make_root() to replace previous d_alloc_root() function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], - [AC_MSG_CHECKING([whether d_make_root() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_make_root(NULL); - ], [d_make_root], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-d-obtain-alias.m4 b/config/kernel-d-obtain-alias.m4 deleted file mode 100644 index 2b4b11eccc1..00000000000 --- a/config/kernel-d-obtain-alias.m4 +++ /dev/null @@ -1,18 +0,0 @@ -dnl # -dnl # 2.6.28 API change -dnl # Added d_obtain_alias() helper function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], - [AC_MSG_CHECKING([whether d_obtain_alias() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_obtain_alias(NULL); - ], [d_obtain_alias], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1, - [d_obtain_alias() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-d-prune-aliases.m4 b/config/kernel-d-prune-aliases.m4 deleted file mode 100644 index d9c521b1d4c..00000000000 --- a/config/kernel-d-prune-aliases.m4 +++ /dev/null @@ -1,19 +0,0 @@ -dnl # -dnl # 2.6.12 API change -dnl # d_prune_aliases() helper function available. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], - [AC_MSG_CHECKING([whether d_prune_aliases() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - struct inode *ip = NULL; - d_prune_aliases(ip); - ], [d_prune_aliases], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, - [d_prune_aliases() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-declare-event-class.m4 b/config/kernel-declare-event-class.m4 index 7867d751749..6c78ee858d7 100644 --- a/config/kernel-declare-event-class.m4 +++ b/config/kernel-declare-event-class.m4 @@ -2,13 +2,10 @@ dnl # dnl # Ensure the DECLARE_EVENT_CLASS macro is available to non-GPL modules. dnl # AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-I\$(src)" - AC_MSG_CHECKING([whether DECLARE_EVENT_CLASS() is available]) ZFS_LINUX_TRY_COMPILE_HEADER([ #include - MODULE_LICENSE(ZFS_META_LICENSE); + MODULE_LICENSE("$ZFS_META_LICENSE"); #define CREATE_TRACE_POINTS #include "conftest.h" @@ -18,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_DECLARE_EVENT_CLASS, 1, - [DECLARE_EVENT_CLASS() is available]) + [DECLARE_EVENT_CLASS() is available]) ],[ AC_MSG_RESULT(no) ],[ @@ -55,5 +52,4 @@ AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ #define TRACE_INCLUDE_FILE conftest #include ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index 2cd2553010d..2dfd2ac554c 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -1,9 +1,103 @@ +dnl # +dnl # 3.4.0 API change +dnl # Added d_make_root() to replace previous d_alloc_root() function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_MAKE_ROOT], [ + ZFS_LINUX_TEST_SRC([d_make_root], [ + #include + ], [ + d_make_root(NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], [ + AC_MSG_CHECKING([whether d_make_root() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_make_root], + [d_make_root], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.28 API change +dnl # Added d_obtain_alias() helper function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS], [ + ZFS_LINUX_TEST_SRC([d_obtain_alias], [ + #include + ], [ + d_obtain_alias(NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], [ + AC_MSG_CHECKING([whether d_obtain_alias() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_obtain_alias], + [d_obtain_alias], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1, + [d_obtain_alias() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.12 API change +dnl # d_prune_aliases() helper function available. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES], [ + ZFS_LINUX_TEST_SRC([d_prune_aliases], [ + #include + ], [ + struct inode *ip = NULL; + d_prune_aliases(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], [ + AC_MSG_CHECKING([whether d_prune_aliases() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_prune_aliases], + [d_prune_aliases], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, + [d_prune_aliases() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.38 API change +dnl # Added d_set_d_op() helper function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ + ZFS_LINUX_TEST_SRC([d_set_d_op], [ + #include + ], [ + d_set_d_op(NULL, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ + AC_MSG_CHECKING([whether d_set_d_op() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op], + [d_set_d_op], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_SET_D_OP, 1, [d_set_d_op() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([dentry_operations_revalidate], [ #include #include @@ -14,11 +108,15 @@ AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ dops __attribute__ ((unused)) = { .d_revalidate = revalidate, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ + AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) + ZFS_LINUX_TEST_RESULT([dentry_operations_revalidate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_D_REVALIDATE_NAMEIDATA, 1, - [dops->d_revalidate() operation takes nameidata]) + [dops->d_revalidate() operation takes nameidata]) ],[ AC_MSG_RESULT(no) ]) @@ -28,9 +126,8 @@ dnl # dnl # 2.6.30 API change dnl # The 'struct dentry_operations' was constified in the dentry structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ - AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS], [ + ZFS_LINUX_TEST_SRC([dentry_operations_const], [ #include const struct dentry_operations test_d_op = { @@ -38,32 +135,17 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ }; ],[ struct dentry d __attribute__ ((unused)); - d.d_op = &test_d_op; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, - [dentry uses const struct dentry_operations]) - ],[ - AC_MSG_RESULT(no) ]) ]) -dnl # -dnl # 2.6.38 API change -dnl # Added d_set_d_op() helper function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], - [AC_MSG_CHECKING([whether d_set_d_op() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_set_d_op(NULL, NULL); - ], [d_set_d_op], [fs/dcache.c], [ +AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ + AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) + ZFS_LINUX_TEST_RESULT([dentry_operations_const], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_SET_D_OP, 1, - [d_set_d_op() is available]) - ], [ + AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, + [dentry uses const struct dentry_operations]) + ],[ AC_MSG_RESULT(no) ]) ]) @@ -72,17 +154,41 @@ dnl # dnl # 2.6.38 API change dnl # Added sb->s_d_op default dentry_operations member dnl # -AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], - [AC_MSG_CHECKING([whether super_block has s_d_op]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_S_D_OP], [ + ZFS_LINUX_TEST_SRC([super_block_s_d_op], [ #include ],[ struct super_block sb __attribute__ ((unused)); sb.s_d_op = NULL; - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], [ + AC_MSG_CHECKING([whether super_block has s_d_op]) + ZFS_LINUX_TEST_RESULT([super_block_s_d_op], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_S_D_OP, 1, [struct super_block has s_d_op]) ], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ + ZFS_AC_KERNEL_SRC_D_MAKE_ROOT + ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS + ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES + ZFS_AC_KERNEL_SRC_D_SET_D_OP + ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA + ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_SRC_S_D_OP +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ + ZFS_AC_KERNEL_D_MAKE_ROOT + ZFS_AC_KERNEL_D_OBTAIN_ALIAS + ZFS_AC_KERNEL_D_PRUNE_ALIASES + ZFS_AC_KERNEL_D_SET_D_OP + ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA + ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_S_D_OP +]) diff --git a/config/kernel-dirty-inode.m4 b/config/kernel-dirty-inode.m4 index ffd87bb146d..dc7667fa488 100644 --- a/config/kernel-dirty-inode.m4 +++ b/config/kernel-dirty-inode.m4 @@ -4,9 +4,8 @@ dnl # The sops->dirty_inode() callbacks were updated to take a flags dnl # argument. This allows the greater control over whether the dnl # filesystem needs to push out a transaction or not. dnl # -AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS], [ - AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ + ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ #include void dirty_inode(struct inode *a, int b) { return; } @@ -15,11 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS], [ sops __attribute__ ((unused)) = { .dirty_inode = dirty_inode, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE], [ + AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) + ZFS_LINUX_TEST_RESULT([dirty_inode_with_flags], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_DIRTY_INODE_WITH_FLAGS, 1, - [sops->dirty_inode() wants flags]) + [sops->dirty_inode() wants flags]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-discard-granularity.m4 b/config/kernel-discard-granularity.m4 index 2c677c90968..c830d9aa9fb 100644 --- a/config/kernel-discard-granularity.m4 +++ b/config/kernel-discard-granularity.m4 @@ -2,18 +2,21 @@ dnl # dnl # 2.6.33 API change dnl # Discard granularity and alignment restrictions may now be set. dnl # -AC_DEFUN([ZFS_AC_KERNEL_DISCARD_GRANULARITY], [ - AC_MSG_CHECKING([whether ql->discard_granularity is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY], [ + ZFS_LINUX_TEST_SRC([discard_granularity], [ #include ],[ struct queue_limits ql __attribute__ ((unused)); - ql.discard_granularity = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DISCARD_GRANULARITY], [ + AC_MSG_CHECKING([whether ql->discard_granularity is available]) + ZFS_LINUX_TEST_RESULT([discard_granularity], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_DISCARD_GRANULARITY, 1, - [ql->discard_granularity is available]) + [ql->discard_granularity is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4 index eba252579bc..3aa7320406d 100644 --- a/config/kernel-elevator-change.m4 +++ b/config/kernel-elevator-change.m4 @@ -2,24 +2,25 @@ dnl # dnl # 2.6.36 API, exported elevator_change() symbol dnl # 4.12 API, removed elevator_change() symbol dnl # -AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [ - AC_MSG_CHECKING([whether elevator_change() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_ELEVATOR_CHANGE], [ + ZFS_LINUX_TEST_SRC([elevator_change], [ #include #include ],[ - int ret; struct request_queue *q = NULL; char *elevator = NULL; - ret = elevator_change(q, elevator); - ],[ + int error __attribute__ ((unused)) = + elevator_change(q, elevator); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [ + AC_MSG_CHECKING([whether elevator_change() is available]) + ZFS_LINUX_TEST_RESULT([elevator_change], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_ELEVATOR_CHANGE, 1, - [elevator_change() is available]) + [elevator_change() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-encode-fh-inode.m4 b/config/kernel-encode-fh-inode.m4 index 287f62a5eda..9d4ba5f0f61 100644 --- a/config/kernel-encode-fh-inode.m4 +++ b/config/kernel-encode-fh-inode.m4 @@ -4,20 +4,23 @@ dnl # torvalds/linux@b0b0382bb4904965a9e9fca77ad87514dfda0d1c changed the dnl # ->encode_fh() callback to pass the child inode and its parents inode dnl # rather than a dentry and a boolean saying whether we want the parent. dnl # -AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ - AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ + ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ #include int encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .encode_fh = encode_fh, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ + AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) + ZFS_LINUX_TEST_RESULT([export_operations_encode_fh], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_ENCODE_FH_WITH_INODE, 1, - [eops->encode_fh() wants child and parent inodes]) + [eops->encode_fh() wants child and parent inodes]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-evict-inode.m4 b/config/kernel-evict-inode.m4 index 683cedb6d38..cd91c666946 100644 --- a/config/kernel-evict-inode.m4 +++ b/config/kernel-evict-inode.m4 @@ -3,16 +3,19 @@ dnl # 2.6.36 API change dnl # The sops->delete_inode() and sops->clear_inode() callbacks have dnl # replaced by a single sops->evict_inode() callback. dnl # -AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ - AC_MSG_CHECKING([whether sops->evict_inode() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ + ZFS_LINUX_TEST_SRC([evict_inode], [ #include void evict_inode (struct inode * t) { return; } static struct super_operations sops __attribute__ ((unused)) = { .evict_inode = evict_inode, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ + AC_MSG_CHECKING([whether sops->evict_inode() exists]) + ZFS_LINUX_TEST_RESULT([evict_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_EVICT_INODE, 1, [sops->evict_inode() exists]) ],[ diff --git a/config/kernel-fallocate-pax.m4 b/config/kernel-fallocate-pax.m4 deleted file mode 100644 index e8948be176d..00000000000 --- a/config/kernel-fallocate-pax.m4 +++ /dev/null @@ -1,19 +0,0 @@ -dnl # -dnl # PaX Linux 2.6.38 - 3.x API -dnl # -AC_DEFUN([ZFS_AC_PAX_KERNEL_FILE_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - long (*fallocate) (struct file *, int, loff_t, loff_t) = NULL; - struct file_operations_no_const fops __attribute__ ((unused)) = { - .fallocate = fallocate, - }; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fallocate.m4 b/config/kernel-fallocate.m4 index 5509064725e..302957a6c4b 100644 --- a/config/kernel-fallocate.m4 +++ b/config/kernel-fallocate.m4 @@ -1,9 +1,11 @@ dnl # -dnl # Linux 2.6.38 - 3.x API +dnl # The fallocate callback was moved from the inode_operations +dnl # structure to the file_operations structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ + + dnl # Linux 2.6.38 - 3.x API + ZFS_LINUX_TEST_SRC([file_fallocate], [ #include long test_fallocate(struct file *file, int mode, @@ -13,21 +15,10 @@ AC_DEFUN([ZFS_AC_KERNEL_FILE_FALLOCATE], [ fops __attribute__ ((unused)) = { .fallocate = test_fallocate, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) + ], []) -dnl # -dnl # Linux 2.6.x - 2.6.37 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_FALLOCATE], [ - AC_MSG_CHECKING([whether iops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ + dnl # Linux 2.6.x - 2.6.37 API + ZFS_LINUX_TEST_SRC([inode_fallocate], [ #include long test_fallocate(struct inode *inode, int mode, @@ -37,20 +28,23 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_FALLOCATE], [ fops __attribute__ ((unused)) = { .fallocate = test_fallocate, }; + ], []) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ + AC_MSG_CHECKING([whether fops->fallocate() exists]) + ZFS_LINUX_TEST_RESULT([file_fallocate], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) ],[ - ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether iops->fallocate() exists]) + ZFS_LINUX_TEST_RESULT([inode_fallocate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_FALLOCATE, 1, [fops->fallocate() exists]) ],[ AC_MSG_RESULT(no) ]) ]) - -dnl # -dnl # The fallocate callback was moved from the inode_operations -dnl # structure to the file_operations structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ - ZFS_AC_KERNEL_FILE_FALLOCATE - ZFS_AC_KERNEL_INODE_FALLOCATE -]) diff --git a/config/kernel-file-dentry.m4 b/config/kernel-file-dentry.m4 index daf742ee1b0..9cb5869c382 100644 --- a/config/kernel-file-dentry.m4 +++ b/config/kernel-file-dentry.m4 @@ -4,14 +4,18 @@ dnl # struct access file->f_path.dentry was replaced by accessor function dnl # since fix torvalds/linux@4bacc9c9234c ("overlayfs: Make f_path always dnl # point to the overlay and f_inode to the underlay"). dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ - AC_MSG_CHECKING([whether file_dentry() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_DENTRY], [ + ZFS_LINUX_TEST_SRC([file_dentry], [ #include ],[ struct file *f = NULL; file_dentry(f); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ + AC_MSG_CHECKING([whether file_dentry() is available]) + ZFS_LINUX_TEST_RESULT([file_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_DENTRY, 1, [file_dentry() is available]) ],[ diff --git a/config/kernel-file-inode.m4 b/config/kernel-file-inode.m4 index 300188fa3a6..00a3621657a 100644 --- a/config/kernel-file-inode.m4 +++ b/config/kernel-file-inode.m4 @@ -3,14 +3,18 @@ dnl # 3.19 API change dnl # struct access f->f_dentry->d_inode was replaced by accessor function dnl # file_inode(f) dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ - AC_MSG_CHECKING([whether file_inode() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_INODE], [ + ZFS_LINUX_TEST_SRC([file_inode], [ #include ],[ struct file *f = NULL; file_inode(f); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ + AC_MSG_CHECKING([whether file_inode() is available]) + ZFS_LINUX_TEST_RESULT([file_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_INODE, 1, [file_inode() is available]) ],[ diff --git a/config/kernel-fmode-t.m4 b/config/kernel-fmode-t.m4 index 4a23c391d32..bc0001b9ebc 100644 --- a/config/kernel-fmode-t.m4 +++ b/config/kernel-fmode-t.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.28 API change, dnl # check if fmode_t typedef is defined dnl # -AC_DEFUN([ZFS_AC_KERNEL_TYPE_FMODE_T], - [AC_MSG_CHECKING([whether kernel defines fmode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FMODE_T], [ + ZFS_LINUX_TEST_SRC([type_fmode_t], [ #include ],[ fmode_t *ptr __attribute__ ((unused)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FMODE_T], [ + AC_MSG_CHECKING([whether kernel defines fmode_t]) + ZFS_LINUX_TEST_RESULT([type_fmode_t], [ AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_FMODE_T, 1, - [kernel defines fmode_t]) + AC_DEFINE(HAVE_FMODE_T, 1, [kernel defines fmode_t]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-follow-down-one.m4 b/config/kernel-follow-down-one.m4 index 63fa779d857..94e4aeb8d47 100644 --- a/config/kernel-follow-down-one.m4 +++ b/config/kernel-follow-down-one.m4 @@ -3,14 +3,18 @@ dnl # 2.6.38 API change dnl # follow_down() renamed follow_down_one(). The original follow_down() dnl # symbol still exists but will traverse down all the layers. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_DOWN_ONE], [ - AC_MSG_CHECKING([whether follow_down_one() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE], [ + ZFS_LINUX_TEST_SRC([follow_down_one], [ #include ],[ struct path *p = NULL; follow_down_one(p); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_DOWN_ONE], [ + AC_MSG_CHECKING([whether follow_down_one() is available]) + ZFS_LINUX_TEST_RESULT([follow_down_one], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FOLLOW_DOWN_ONE, 1, [follow_down_one() is available]) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index 49316aab459..3c7933413d1 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -12,11 +12,10 @@ dnl # dnl # Pre-4.2: Use kernel_fpu_{begin,end}() dnl # HAVE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU dnl # -AC_DEFUN([ZFS_AC_KERNEL_FPU], [ - dnl # - dnl # N.B. The header check is performed before all other checks since - dnl # it depends on HAVE_KERNEL_FPU_API_HEADER being set in confdefs.h. - dnl # +dnl # N.B. The header check is performed before all other checks since it +dnl # depends on HAVE_KERNEL_FPU_API_HEADER being set in confdefs.h. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [ AC_MSG_CHECKING([whether fpu headers are available]) ZFS_LINUX_TRY_COMPILE([ #include @@ -29,13 +28,10 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ ],[ AC_MSG_RESULT(i387.h & xcr.h) ]) +]) - dnl # - dnl # Legacy kernel - dnl # - AC_MSG_CHECKING([whether kernel fpu is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ + ZFS_LINUX_TEST_SRC([kernel_fpu], [ #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include @@ -43,11 +39,68 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ #include #include #endif - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ + ], [ kernel_fpu_begin(); kernel_fpu_end(); - ], [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ + ], [], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([__kernel_fpu], [ + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #else + #include + #include + #endif + ], [ + __kernel_fpu_begin(); + __kernel_fpu_end(); + ], [], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([fpu_internal], [ + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif + + #if !defined(__x86) + #error Unsupported architecture + #endif + + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + #if !defined(XSTATE_XSAVE) + #error XSTATE_XSAVE not defined + #endif + + #if !defined(XSTATE_XRESTORE) + #error XSTATE_XRESTORE not defined + #endif + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->state; + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FPU], [ + dnl # + dnl # Legacy kernel + dnl # + AC_MSG_CHECKING([whether kernel fpu is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([kernel_fpu_license], + [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ AC_MSG_RESULT(kernel_fpu_*) AC_DEFINE(HAVE_KERNEL_FPU, 1, [kernel has kernel_fpu_* functions]) @@ -57,67 +110,16 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ dnl # dnl # Linux 4.2 kernel dnl # - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - #include - #ifdef HAVE_KERNEL_FPU_API_HEADER - #include - #else - #include - #include - #endif - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - __kernel_fpu_begin(); - __kernel_fpu_end(); - ], [__kernel_fpu_begin], - [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL([__kernel_fpu_license], + [__kernel_fpu_begin], + [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ AC_MSG_RESULT(__kernel_fpu_*) AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, [kernel has __kernel_fpu_* functions]) AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ - ZFS_LINUX_TRY_COMPILE([ - #include - - #if defined(__x86_64) || defined(__x86_64__) || \ - defined(__i386) || defined(__i386__) - #if !defined(__x86) - #define __x86 - #endif - #endif - - #if !defined(__x86) - #error Unsupported architecture - #endif - - #include - #ifdef HAVE_KERNEL_FPU_API_HEADER - #include - #include - #else - #include - #include - #endif - - #if !defined(XSTATE_XSAVE) - #error XSTATE_XSAVE not defined - #endif - - #if !defined(XSTATE_XRESTORE) - #error XSTATE_XRESTORE not defined - #endif - ],[ - struct fpu *fpu = ¤t->thread.fpu; - union fpregs_state *st = &fpu->state; - struct fregs_state *fr __attribute__ ((unused)) = - &st->fsave; - struct fxregs_state *fxr __attribute__ ((unused)) = - &st->fxsave; - struct xregs_state *xr __attribute__ ((unused)) = - &st->xsave; - ], [ + ZFS_LINUX_TEST_RESULT([fpu_internal], [ AC_MSG_RESULT(internal) AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, [kernel fpu internal]) diff --git a/config/kernel-fst-mount.m4 b/config/kernel-fst-mount.m4 index a8ac50bdd5d..cec1ed4d6cd 100644 --- a/config/kernel-fst-mount.m4 +++ b/config/kernel-fst-mount.m4 @@ -3,9 +3,8 @@ dnl # 2.6.38 API change dnl # The .get_sb callback has been replaced by a .mount callback dnl # in the file_system_type structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ - AC_MSG_CHECKING([whether fst->mount() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FST_MOUNT], [ + ZFS_LINUX_TEST_SRC([file_system_type_mount], [ #include static struct dentry * @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ static struct file_system_type fst __attribute__ ((unused)) = { .mount = mount, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ + AC_MSG_CHECKING([whether fst->mount() exists]) + ZFS_LINUX_TEST_RESULT([file_system_type_mount], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FST_MOUNT, 1, [fst->mount() exists]) ],[ diff --git a/config/kernel-fsync.m4 b/config/kernel-fsync.m4 index e1f2d68b9b1..0494e31ad11 100644 --- a/config/kernel-fsync.m4 +++ b/config/kernel-fsync.m4 @@ -1,8 +1,8 @@ dnl # -dnl # Linux 2.6.x - 2.6.34 API +dnl # Check file_operations->fsync interface. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITH_DENTRY], [ - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ + ZFS_LINUX_TEST_SRC([fsync_with_dentry], [ #include int test_fsync(struct file *f, struct dentry *dentry, int x) @@ -12,20 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITH_DENTRY], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([dentry]) - AC_DEFINE(HAVE_FSYNC_WITH_DENTRY, 1, - [fops->fsync() with dentry]) - ],[ - ]) -]) + ],[]) -dnl # -dnl # Linux 2.6.35 - Linux 3.0 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY], [ - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ #include int test_fsync(struct file *f, int x) { return 0; } @@ -34,20 +23,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([no dentry]) - AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, - [fops->fsync() without dentry]) - ],[ - ]) -]) + ],[]) -dnl # -dnl # Linux 3.1 - 3.x API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_RANGE], [ - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([fsync_range], [ #include int test_fsync(struct file *f, loff_t a, loff_t b, int c) @@ -57,18 +35,43 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_RANGE], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([range]) - AC_DEFINE(HAVE_FSYNC_RANGE, 1, - [fops->fsync() with range]) - ],[ - ]) + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_FSYNC], [ - AC_MSG_CHECKING([whether fops->fsync() wants]) - ZFS_AC_KERNEL_FSYNC_WITH_DENTRY - ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY - ZFS_AC_KERNEL_FSYNC_RANGE + dnl # + dnl # Linux 2.6.x - 2.6.34 API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants dentry]) + ZFS_LINUX_TEST_RESULT([fsync_with_dentry], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FSYNC_WITH_DENTRY, 1, + [fops->fsync() with dentry]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 2.6.35 - Linux 3.0 API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants no dentry]) + ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, + [fops->fsync() without dentry]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 3.1 - 3.x API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants range]) + ZFS_LINUX_TEST_RESULT([fsync_range], [ + AC_MSG_RESULT([range]) + AC_DEFINE(HAVE_FSYNC_RANGE, 1, + [fops->fsync() with range]) + ],[ + ZFS_LINUX_TEST_ERROR([fops->fsync]) + ]) + ]) + ]) ]) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index 0aa76216226..423b3e5a352 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -1,12 +1,8 @@ dnl # -dnl # 3.19 API addition +dnl # Check for generic io accounting interface. dnl # -dnl # torvalds/linux@394ffa503bc40e32d7f54a9b817264e81ce131b4 allows us to -dnl # increment iostat counters without generic_make_request(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [ - AC_MSG_CHECKING([whether 3 arg generic IO accounting symbols are available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ + ZFS_LINUX_TEST_SRC([generic_acct_3args], [ #include void (*generic_start_io_acct_f)(int, unsigned long, @@ -16,24 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [ ], [ generic_start_io_acct(0, 0, NULL); generic_end_io_acct(0, NULL, 0); - ], [generic_start_io_acct], [block/bio.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_start_io_acct()/generic_end_io_acct() available]) - ], [ - AC_MSG_RESULT(no) ]) -]) -dnl # -dnl # Linux 4.14 API, -dnl # -dnl # generic_start_io_acct/generic_end_io_acct now require request_queue to be -dnl # provided. No functional changes, but preparation for inflight accounting -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [ - AC_MSG_CHECKING([whether 4 arg generic IO accounting symbols are available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ + ZFS_LINUX_TEST_SRC([generic_acct_4args], [ #include void (*generic_start_io_acct_f)(struct request_queue *, int, @@ -43,11 +24,41 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [ ], [ generic_start_io_acct(NULL, 0, 0, NULL); generic_end_io_acct(NULL, 0, NULL, 0); - ], [generic_start_io_acct], [block/bio.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ + dnl # + dnl # 3.19 API addition + dnl # + dnl # torvalds/linux@394ffa50 allows us to increment iostat + dnl # counters without generic_make_request(). + dnl # + AC_MSG_CHECKING([whether generic IO accounting wants 3 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + [generic_start_io_acct], [block/bio.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, - [generic_start_io_acct()/generic_end_io_acct() 4 arg available]) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() available]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # Linux 4.14 API, + dnl # + dnl # generic_start_io_acct/generic_end_io_acct now require + dnl # request_queue to be provided. No functional changes, + dnl # but preparation for inflight accounting. + dnl # + AC_MSG_CHECKING([whether generic IO accounting wants 4 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], + [generic_start_io_acct], [block/bio.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() ] + [4 arg available]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) diff --git a/config/kernel-generic_readlink.m4 b/config/kernel-generic_readlink.m4 index 914431de4fd..a7a33b408ab 100644 --- a/config/kernel-generic_readlink.m4 +++ b/config/kernel-generic_readlink.m4 @@ -4,18 +4,21 @@ dnl # dnl # NULL inode_operations.readlink implies generic_readlink(), which dnl # has been made static. dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ - AC_MSG_CHECKING([whether generic_readlink is global]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL], [ + ZFS_LINUX_TEST_SRC([generic_readlink_global], [ #include ],[ int i __attribute__ ((unused)); - i = generic_readlink(NULL, NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ + AC_MSG_CHECKING([whether generic_readlink is global]) + ZFS_LINUX_TEST_RESULT([generic_readlink_global], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_GENERIC_READLINK, 1, - [generic_readlink is global]) + [generic_readlink is global]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-get-disk-and-module.m4 b/config/kernel-get-disk-and-module.m4 index 2a51a5af7dc..51cf7743cf0 100644 --- a/config/kernel-get-disk-and-module.m4 +++ b/config/kernel-get-disk-and-module.m4 @@ -2,14 +2,19 @@ dnl # dnl # 4.16 API change dnl # Verify if get_disk_and_module() symbol is available. dnl # -AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], - [AC_MSG_CHECKING([whether get_disk_and_module() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE], [ + ZFS_LINUX_TEST_SRC([get_disk_and_module], [ #include ], [ struct gendisk *disk = NULL; (void) get_disk_and_module(disk); - ], [get_disk_and_module], [block/genhd.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], [ + AC_MSG_CHECKING([whether get_disk_and_module() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([get_disk_and_module], + [get_disk_and_module], [block/genhd.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_DISK_AND_MODULE, 1, [get_disk_and_module() is available]) diff --git a/config/kernel-get-disk-ro.m4 b/config/kernel-get-disk-ro.m4 index 13ed81217ee..1e2abb475f9 100644 --- a/config/kernel-get-disk-ro.m4 +++ b/config/kernel-get-disk-ro.m4 @@ -1,21 +1,21 @@ dnl # dnl # 2.6.x API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [ - AC_MSG_CHECKING([whether get_disk_ro() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_RO], [ + ZFS_LINUX_TEST_SRC([get_disk_ro], [ #include ],[ struct gendisk *disk = NULL; (void) get_disk_ro(disk); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [ + AC_MSG_CHECKING([whether get_disk_ro() is available]) + ZFS_LINUX_TEST_RESULT([get_disk_ro], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_DISK_RO, 1, - [blk_disk_ro() is available]) + AC_DEFINE(HAVE_GET_DISK_RO, 1, [blk_disk_ro() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-get-link.m4 b/config/kernel-get-link.m4 index 3cda08c1b4d..e4f478e37c1 100644 --- a/config/kernel-get-link.m4 +++ b/config/kernel-get-link.m4 @@ -1,13 +1,29 @@ dnl # dnl # Supported get_link() interfaces checked newest to oldest. +dnl # Note this interface used to be named follow_link. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_LINK], [ - dnl # - dnl # 4.2 API change - dnl # - This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING([whether iops->follow_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ + ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ + #include + const char *get_link(struct dentry *de, struct inode *ip, + struct delayed_call *done) { return "symlink"; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .get_link = get_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ + #include + const char *get_link(struct dentry *de, struct + inode *ip, void **cookie) { return "symlink"; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .get_link = get_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ #include const char *follow_link(struct dentry *de, void **cookie) { return "symlink"; } @@ -15,35 +31,17 @@ AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_LINK], [ iops __attribute__ ((unused)) = { .follow_link = follow_link, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, - [iops->follow_link() cookie]) - ],[ - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether iops->follow_link() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ #include - void *follow_link(struct dentry *de, struct - nameidata *nd) { return (void *)NULL; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, - [iops->follow_link() nameidata]) - ],[ - AC_MSG_ERROR(no; please file a bug report) - ]) - ]) + void *follow_link(struct dentry *de, struct + nameidata *nd) { return (void *)NULL; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .follow_link = follow_link, + }; + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ @@ -53,20 +51,12 @@ AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ dnl # used it to retire the put_link() interface. dnl # AC_MSG_CHECKING([whether iops->get_link() passes delayed]) - ZFS_LINUX_TRY_COMPILE([ - #include - const char *get_link(struct dentry *de, struct inode *ip, - struct delayed_call *done) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, - [iops->get_link() delayed]) + AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, [iops->get_link() delayed]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 4.5 API change dnl # The follow_link() interface has been replaced by @@ -74,27 +64,41 @@ AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ dnl # - An inode is passed as a separate argument dnl # - When called in RCU mode a NULL dentry is passed. dnl # - AC_MSG_RESULT(no) AC_MSG_CHECKING([whether iops->get_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ - #include - const char *get_link(struct dentry *de, struct - inode *ip, void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link_cookie], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_LINK_COOKIE, 1, [iops->get_link() cookie]) ],[ + AC_MSG_RESULT(no) + dnl # - dnl # Check for the follow_link APIs. + dnl # 4.2 API change + dnl # This kernel retired the nameidata structure. dnl # - AC_MSG_RESULT(no) - ZFS_AC_KERNEL_FOLLOW_LINK + AC_MSG_CHECKING( + [whether iops->follow_link() passes cookie]) + ZFS_LINUX_TEST_RESULT([inode_operations_follow_link], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, + [iops->follow_link() cookie]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # 2.6.32 API + dnl # + AC_MSG_CHECKING( + [whether iops->follow_link() passes nameidata]) + ZFS_LINUX_TEST_RESULT( + [inode_operations_follow_link_nameidata],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, + [iops->follow_link() nameidata]) + ],[ + ZFS_LINUX_TEST_ERROR([get_link]) + ]) + ]) ]) ]) ]) diff --git a/config/kernel-global_page_state.m4 b/config/kernel-global_page_state.m4 index f4a40011f6f..a0cb9e2c827 100644 --- a/config/kernel-global_page_state.m4 +++ b/config/kernel-global_page_state.m4 @@ -4,16 +4,21 @@ dnl # dnl # 75ef71840539 mm, vmstat: add infrastructure for per-node vmstats dnl # 599d0c954f91 mm, vmscan: move LRU lists to node dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_node_page_state() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE], [ + ZFS_LINUX_TEST_SRC([global_node_page_state], [ #include #include ],[ (void) global_node_page_state(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ + AC_MSG_CHECKING([whether global_node_page_state() exists]) + ZFS_LINUX_TEST_RESULT([global_node_page_state], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, [global_node_page_state() exists]) + AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, + [global_node_page_state() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -24,16 +29,21 @@ dnl # 4.14 API change dnl # dnl # c41f012ade0b mm: rename global_page_state to global_zone_page_state dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_zone_page_state() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE], [ + ZFS_LINUX_TEST_SRC([global_zone_page_state], [ #include #include ],[ (void) global_zone_page_state(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ + AC_MSG_CHECKING([whether global_zone_page_state() exists]) + ZFS_LINUX_TEST_RESULT([global_zone_page_state], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, [global_zone_page_state() exists]) + AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, + [global_zone_page_state() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -44,9 +54,11 @@ dnl # Create a define and autoconf variable for an enum member dnl # AC_DEFUN([ZFS_AC_KERNEL_ENUM_MEMBER], [ AC_MSG_CHECKING([whether enum $2 contains $1]) - AS_IF([AC_TRY_COMMAND("${srcdir}/scripts/enum-extract.pl" "$2" "$3" | egrep -qx $1)],[ + AS_IF([AC_TRY_COMMAND( + "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | egrep -qx $1)],[ AC_MSG_RESULT([yes]) - AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, [enum $2 contains $1]) + AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, + [enum $2 contains $1]) m4_join([_], [ZFS_ENUM], m4_toupper($2), $1)=1 ],[ AC_MSG_RESULT([no]) @@ -59,8 +71,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR],[ AC_MSG_RESULT(no) AC_MSG_RESULT([$1 in either node_stat_item or zone_stat_item: $2]) - AC_MSG_RESULT([configure needs updating, see: config/kernel-global_page_state.m4]) - AC_MSG_FAILURE([SHUT 'ER DOWN CLANCY, SHE'S PUMPIN' MUD!]) + ZFS_LINUX_TEST_ERROR([global page state]) ]) AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ @@ -75,10 +86,10 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ ]) dnl # -dnl # Ensure the config tests are finding one and only one of each enum of interest +dnl # Ensure the config tests are finding one and only one of each enum. dnl # AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ - AC_MSG_CHECKING([global_page_state enums are sane]) + AC_MSG_CHECKING([whether global_page_state enums are sane]) ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_FILE_PAGES]) ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_ANON]) @@ -88,6 +99,11 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ AC_MSG_RESULT(yes) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE], [ + ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE + ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE +]) + dnl # dnl # enum members in which we're interested dnl # @@ -95,15 +111,23 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE], [ ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY ]) diff --git a/config/kernel-group-info.m4 b/config/kernel-group-info.m4 index 849a1e246a4..0fee1d36d50 100644 --- a/config/kernel-group-info.m4 +++ b/config/kernel-group-info.m4 @@ -2,20 +2,21 @@ dnl # dnl # 4.9 API change dnl # group_info changed from 2d array via >blocks to 1d array via ->gid dnl # -AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ - AC_MSG_CHECKING([whether group_info->gid exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GROUP_INFO_GID], [ + ZFS_LINUX_TEST_SRC([group_info_gid], [ #include ],[ struct group_info *gi = groups_alloc(1); gi->gid[0] = KGIDT_INIT(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ + AC_MSG_CHECKING([whether group_info->gid exists]) + ZFS_LINUX_TEST_RESULT([group_info_gid], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-in-compat-syscall.m4 b/config/kernel-in-compat-syscall.m4 index 9fca9da20ea..baaac8c4fda 100644 --- a/config/kernel-in-compat-syscall.m4 +++ b/config/kernel-in-compat-syscall.m4 @@ -4,13 +4,17 @@ dnl # Added in_compat_syscall() which can be overridden on a per- dnl # architecture basis. Prior to this is_compat_task() was the dnl # provided interface. dnl # -AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ - AC_MSG_CHECKING([whether in_compat_syscall() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL], [ + ZFS_LINUX_TEST_SRC([in_compat_syscall], [ #include ],[ in_compat_syscall(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ + AC_MSG_CHECKING([whether in_compat_syscall() is available]) + ZFS_LINUX_TEST_RESULT([in_compat_syscall], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, [in_compat_syscall() is available]) diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 index f10e0b25108..48391d66f8b 100644 --- a/config/kernel-inode-getattr.m4 +++ b/config/kernel-inode-getattr.m4 @@ -2,9 +2,8 @@ dnl # dnl # Linux 4.11 API dnl # See torvalds/linux@a528d35 dnl # -AC_DEFUN([ZFS_AC_PATH_KERNEL_IOPS_GETATTR], [ - AC_MSG_CHECKING([whether iops->getattr() takes a path]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ + ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [ #include int test_getattr( @@ -16,24 +15,9 @@ AC_DEFUN([ZFS_AC_PATH_KERNEL_IOPS_GETATTR], [ iops __attribute__ ((unused)) = { .getattr = test_getattr, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, - [iops->getattr() takes a path]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - + ],[]) - -dnl # -dnl # Linux 3.9 - 4.10 API -dnl # -AC_DEFUN([ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR], [ - AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ #include int test_getattr( @@ -45,23 +29,25 @@ AC_DEFUN([ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR], [ iops __attribute__ ((unused)) = { .getattr = test_getattr, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ + AC_MSG_CHECKING([whether iops->getattr() takes a path]) + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, - [iops->getattr() takes a vfsmount]) + AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, + [iops->getattr() takes a path]) ],[ AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # The interface of the getattr callback from the inode_operations -dnl # structure changed. Also, the interface of the simple_getattr() -dnl # function provided by the kernel changed. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GETATTR], [ - ZFS_AC_PATH_KERNEL_IOPS_GETATTR - ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR + AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, + [iops->getattr() takes a vfsmount]) + ],[ + AC_MSG_RESULT(no) + ]) + ]) ]) diff --git a/config/kernel-inode-lock.m4 b/config/kernel-inode-lock.m4 index 8dee0142272..5eb04af7877 100644 --- a/config/kernel-inode-lock.m4 +++ b/config/kernel-inode-lock.m4 @@ -4,20 +4,21 @@ dnl # i_mutex is changed to i_rwsem. Instead of directly using dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared() dnl # We test inode_lock_shared because inode_lock is introduced earlier. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ - AC_MSG_CHECKING([whether inode_lock_shared() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_LOCK], [ + ZFS_LINUX_TEST_SRC([inode_lock], [ #include ],[ struct inode *inode = NULL; inode_lock_shared(inode); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ + AC_MSG_CHECKING([whether inode_lock_shared() exists]) + ZFS_LINUX_TEST_RESULT([inode_lock], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-inode-set-flags.m4 b/config/kernel-inode-set-flags.m4 index e0ad26796dd..133f666a951 100644 --- a/config/kernel-inode-set-flags.m4 +++ b/config/kernel-inode-set-flags.m4 @@ -2,14 +2,18 @@ dnl # dnl # 3.15 API change dnl # inode_set_flags introduced to set i_flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ - AC_MSG_CHECKING([whether inode_set_flags() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_set_flags], [ #include ],[ struct inode inode; inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ + AC_MSG_CHECKING([whether inode_set_flags() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_SET_FLAGS, 1, [inode_set_flags() exists]) ],[ diff --git a/config/kernel-inode-set-iversion.m4 b/config/kernel-inode-set-iversion.m4 index 9a7d7890e54..dd415de324a 100644 --- a/config/kernel-inode-set-iversion.m4 +++ b/config/kernel-inode-set-iversion.m4 @@ -2,14 +2,18 @@ dnl # dnl # 4.16 API change dnl # inode_set_iversion introduced to set i_version dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ - AC_MSG_CHECKING([whether inode_set_iversion() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION], [ + ZFS_LINUX_TEST_SRC([inode_set_iversion], [ #include ],[ struct inode inode; inode_set_iversion(&inode, 1); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ + AC_MSG_CHECKING([whether inode_set_iversion() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_iversion], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_SET_IVERSION, 1, [inode_set_iversion() exists]) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index f5818411aa5..57e7f31fdcb 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -2,11 +2,8 @@ dnl # dnl # 4.18 API change dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ - AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ + ZFS_LINUX_TEST_SRC([inode_times], [ #include ],[ struct inode ip; @@ -14,12 +11,16 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ memset(&ip, 0, sizeof(ip)); ts = ip.i_mtime; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ + AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) + ZFS_LINUX_TEST_RESULT([inode_times], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, [inode->i_*time's are timespec64]) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-insert-inode-locked.m4 b/config/kernel-insert-inode-locked.m4 index da141d180a9..4990399c3f1 100644 --- a/config/kernel-insert-inode-locked.m4 +++ b/config/kernel-insert-inode-locked.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.28 API change dnl # Added insert_inode_locked() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INSERT_INODE_LOCKED], - [AC_MSG_CHECKING([whether insert_inode_locked() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED], [ + ZFS_LINUX_TEST_SRC([insert_inode_locked], [ #include ], [ insert_inode_locked(NULL); - ], [insert_inode_locked], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INSERT_INODE_LOCKED], [ + AC_MSG_CHECKING([whether insert_inode_locked() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([insert_inode_locked], + [insert_inode_locked], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INSERT_INODE_LOCKED, 1, - [insert_inode_locked() is available]) + [insert_inode_locked() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-invalidate-bdev-args.m4 b/config/kernel-invalidate-bdev-args.m4 index 09c2ebf26e9..55a784dd91a 100644 --- a/config/kernel-invalidate-bdev-args.m4 +++ b/config/kernel-invalidate-bdev-args.m4 @@ -2,17 +2,21 @@ dnl # dnl # 2.6.22 API change dnl # Unused destroy_dirty_buffers arg removed from prototype. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS], [ - AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV], [ + ZFS_LINUX_TEST_SRC([invalidate_bdev], [ #include ],[ struct block_device *bdev = NULL; invalidate_bdev(bdev); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV], [ + AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg]) + ZFS_LINUX_TEST_RESULT([invalidate_bdev], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_INVALIDATE_BDEV, 1, - [invalidate_bdev() wants 1 arg]) + [invalidate_bdev() wants 1 arg]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-is_owner_or_cap.m4 b/config/kernel-is_owner_or_cap.m4 index da07e58dda3..ab80724091a 100644 --- a/config/kernel-is_owner_or_cap.m4 +++ b/config/kernel-is_owner_or_cap.m4 @@ -4,33 +4,40 @@ dnl # The is_owner_or_cap() macro was renamed to inode_owner_or_capable(), dnl # This is used for permission checks in the xattr and file attribute call dnl # paths. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ - AC_MSG_CHECKING([whether inode_owner_or_capable() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [ + ZFS_LINUX_TEST_SRC([inode_owner_or_capable], [ #include ],[ struct inode *ip = NULL; (void) inode_owner_or_capable(ip); + ]) + + + ZFS_LINUX_TEST_SRC([is_owner_or_cap], [ + #include + #include ],[ + struct inode *ip = NULL; + (void) is_owner_or_cap(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ + AC_MSG_CHECKING([whether inode_owner_or_capable() exists]) + ZFS_LINUX_TEST_RESULT([inode_owner_or_capable], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE, 1, [inode_owner_or_capable() exists]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether is_owner_or_cap() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct inode *ip = NULL; - (void) is_owner_or_cap(ip); - ],[ + + ZFS_LINUX_TEST_RESULT([is_owner_or_cap], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IS_OWNER_OR_CAP, 1, [is_owner_or_cap() exists]) ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) + ZFS_LINUX_TEST_ERROR([capability]) ]) ]) ]) diff --git a/config/kernel-kmap-atomic-args.m4 b/config/kernel-kmap-atomic-args.m4 index beb1692e721..d09e93d7ffe 100644 --- a/config/kernel-kmap-atomic-args.m4 +++ b/config/kernel-kmap-atomic-args.m4 @@ -3,17 +3,21 @@ dnl # 2.6.37 API change dnl # kmap_atomic changed from assigning hard-coded named slot to using dnl # push/pop based dynamical allocation. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS], [ - AC_MSG_CHECKING([whether kmap_atomic wants 1 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS], [ + ZFS_LINUX_TEST_SRC([kmap_atomic], [ #include ],[ struct page page; kmap_atomic(&page); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS], [ + AC_MSG_CHECKING([whether kmap_atomic wants 1 args]) + ZFS_LINUX_TEST_RESULT([kmap_atomic], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_KMAP_ATOMIC, 1, - [kmap_atomic wants 1 args]) + [kmap_atomic wants 1 args]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-kmem-cache.m4 b/config/kernel-kmem-cache.m4 index 21cc53d3493..7576e6cfd85 100644 --- a/config/kernel-kmem-cache.m4 +++ b/config/kernel-kmem-cache.m4 @@ -5,30 +5,36 @@ dnl # private allocation flags which are applied when allocating a new slab dnl # in kmem_getpages(). Unfortunately there is no public API for setting dnl # non-default flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS], [ - AC_MSG_CHECKING([whether struct kmem_cache has allocflags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS], [ + ZFS_LINUX_TEST_SRC([kmem_cache_allocflags], [ #include ],[ struct kmem_cache cachep __attribute__ ((unused)); cachep.allocflags = GFP_KERNEL; + ]) + + ZFS_LINUX_TEST_SRC([kmem_cache_gfpflags], [ + #include ],[ + struct kmem_cache cachep __attribute__ ((unused)); + cachep.gfpflags = GFP_KERNEL; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS], [ + AC_MSG_CHECKING([whether struct kmem_cache has allocflags]) + ZFS_LINUX_TEST_RESULT([kmem_cache_allocflags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1, - [struct kmem_cache has allocflags]) + [struct kmem_cache has allocflags]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether struct kmem_cache has gfpflags]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct kmem_cache cachep __attribute__ ((unused)); - cachep.gfpflags = GFP_KERNEL; - ],[ + ZFS_LINUX_TEST_RESULT([kmem_cache_gfpflags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1, - [struct kmem_cache has gfpflags]) + [struct kmem_cache has gfpflags]) ],[ AC_MSG_RESULT(no) ]) @@ -40,16 +46,10 @@ dnl # grsecurity API change, dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by dnl # kmem_cache_create_usercopy(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ - AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY], [ + ZFS_LINUX_TEST_SRC([kmem_cache_create_usercopy], [ #include - static void ctor(void *foo) - { - // fake ctor - } + static void ctor(void *foo) { /* fake ctor */ } ],[ struct kmem_cache *skc_linux_cache; const char *name = "test"; @@ -60,13 +60,27 @@ AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ size_t usersize = size - useroffset; skc_linux_cache = kmem_cache_create_usercopy( - name, size, align, flags, useroffset, usersize, ctor); - ],[ + name, size, align, flags, useroffset, usersize, ctor); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ + AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) + ZFS_LINUX_TEST_RESULT([kmem_cache_create_usercopy], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1, - [kmem_cache_create_usercopy() exists]) + [kmem_cache_create_usercopy() exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE], [ + ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS + ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE], [ + ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS + ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY ]) diff --git a/config/kernel-kstrtoul.m4 b/config/kernel-kstrtoul.m4 index 5530e0e2d85..ef3c9843cce 100644 --- a/config/kernel-kstrtoul.m4 +++ b/config/kernel-kstrtoul.m4 @@ -1,18 +1,20 @@ dnl # dnl # 2.6.39 API change dnl # -dnl # 33ee3b2e2eb9 kstrto*: converting strings to integers done (hopefully) right -dnl # dnl # If kstrtoul() doesn't exist, fallback to use strict_strtoul() which has dnl # existed since 2.6.25. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ - AC_MSG_CHECKING([whether kstrtoul() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KSTRTOUL], [ + ZFS_LINUX_TEST_SRC([kstrtoul], [ #include ],[ int ret __attribute__ ((unused)) = kstrtoul(NULL, 10, NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ + AC_MSG_CHECKING([whether kstrtoul() exists]) + ZFS_LINUX_TEST_RESULT([kstrtoul], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists]) ],[ diff --git a/config/kernel-ktime_get_coarse_real_ts64.m4 b/config/kernel-ktime_get_coarse_real_ts64.m4 index d6be8c4185a..28492bf04bc 100644 --- a/config/kernel-ktime_get_coarse_real_ts64.m4 +++ b/config/kernel-ktime_get_coarse_real_ts64.m4 @@ -2,16 +2,21 @@ dnl # dnl # 4.18: ktime_get_coarse_real_ts64() added. Use it in place of dnl # current_kernel_time64(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], - [AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ + ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ #include ], [ struct timespec64 ts; ktime_get_coarse_real_ts64(&ts); - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ + AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) + ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, [ktime_get_coarse_real_ts64() exists]) + AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, + [ktime_get_coarse_real_ts64() exists]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-kuid-helpers.m4 b/config/kernel-kuid-helpers.m4 index 60713b9d313..4bc4e039d8c 100644 --- a/config/kernel-kuid-helpers.m4 +++ b/config/kernel-kuid-helpers.m4 @@ -5,14 +5,18 @@ dnl # became necessary to go through one more level of indirection dnl # when dealing with uid/gid - namely the kuid type. dnl # dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUID_HELPERS], [ - AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUID_HELPERS], [ + ZFS_LINUX_TEST_SRC([i_uid_read], [ #include ],[ struct inode *ip = NULL; (void) i_uid_read(ip); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUID_HELPERS], [ + AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist]) + ZFS_LINUX_TEST_RESULT([i_uid_read], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KUID_HELPERS, 1, [i_(uid|gid)_(read|write) exist]) diff --git a/config/kernel-kuidgid.m4 b/config/kernel-kuidgid.m4 index 82685d26369..15bf98154e8 100644 --- a/config/kernel-kuidgid.m4 +++ b/config/kernel-kuidgid.m4 @@ -3,20 +3,26 @@ dnl # User namespaces, use kuid_t in place of uid_t dnl # where available. Not strictly a user namespaces thing dnl # but it should prevent surprises dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUIDGID_T], [ - AC_MSG_CHECKING([whether kuid_t/kgid_t is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUIDGID_T], [ + ZFS_LINUX_TEST_SRC([kuidgid_t_init], [ #include ], [ kuid_t userid __attribute__ ((unused)) = KUIDT_INIT(0); kgid_t groupid __attribute__ ((unused)) = KGIDT_INIT(0); - ],[ - ZFS_LINUX_TRY_COMPILE([ - #include - ], [ - kuid_t userid __attribute__ ((unused)) = 0; - kgid_t groupid __attribute__ ((unused)) = 0; - ],[ + ]) + + ZFS_LINUX_TEST_SRC([kuidgid_t], [ + #include + ], [ + kuid_t userid __attribute__ ((unused)) = 0; + kgid_t groupid __attribute__ ((unused)) = 0; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUIDGID_T], [ + AC_MSG_CHECKING([whether kuid_t/kgid_t is available]) + ZFS_LINUX_TEST_RESULT([kuidgid_t_init], [ + ZFS_LINUX_TEST_RESULT([kuidgid_t], [ AC_MSG_RESULT(yes; optional) ],[ AC_MSG_RESULT(yes; mandatory) diff --git a/config/kernel-lookup-bdev.m4 b/config/kernel-lookup-bdev.m4 index abbf55d9bb3..72b4993e148 100644 --- a/config/kernel-lookup-bdev.m4 +++ b/config/kernel-lookup-bdev.m4 @@ -2,23 +2,33 @@ dnl # dnl # 2.6.27, lookup_bdev() was exported. dnl # 4.4.0-6.21 - x.y on Ubuntu, lookup_bdev() takes 2 arguments. dnl # -AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], - [AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_BDEV], [ + ZFS_LINUX_TEST_SRC([lookup_bdev_1arg], [ #include ], [ lookup_bdev(NULL); - ], [lookup_bdev], [fs/block_dev.c], [ + ]) + + ZFS_LINUX_TEST_SRC([lookup_bdev_2args], [ + #include + ], [ + lookup_bdev(NULL, FMODE_READ); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], [ + AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg]) + ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg], + [lookup_bdev], [fs/block_dev.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, [lookup_bdev() wants 1 arg]) + AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, + [lookup_bdev() wants 1 arg]) ], [ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether lookup_bdev() wants 2 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - lookup_bdev(NULL, FMODE_READ); - ], [lookup_bdev], [fs/block_dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_2args], + [lookup_bdev], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_LOOKUP_BDEV, 1, [lookup_bdev() wants 2 args]) @@ -26,4 +36,4 @@ AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], AC_MSG_RESULT(no) ]) ]) -]) \ No newline at end of file +]) diff --git a/config/kernel-lookup-nameidata.m4 b/config/kernel-lookup-nameidata.m4 index 5453be5e8e3..865b8aff851 100644 --- a/config/kernel-lookup-nameidata.m4 +++ b/config/kernel-lookup-nameidata.m4 @@ -1,9 +1,8 @@ dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->lookup() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([lookup_nameidata], [ #include #include @@ -15,11 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ __attribute__ ((unused)) = { .lookup = inode_lookup, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->lookup() passes nameidata]) + ZFS_LINUX_TEST_RESULT([lookup_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_LOOKUP_NAMEIDATA, 1, - [iops->lookup() passes nameidata]) + [iops->lookup() passes nameidata]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-lseek-execute.m4 b/config/kernel-lseek-execute.m4 index 8c4032b92c6..652f611f8da 100644 --- a/config/kernel-lseek-execute.m4 +++ b/config/kernel-lseek-execute.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.11 API change dnl # lseek_execute helper exported dnl # -AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], - [AC_MSG_CHECKING([whether lseek_execute() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE], [ + ZFS_LINUX_TEST_SRC([lseek_execute], [ #include ], [ struct file *fp __attribute__ ((unused)) = NULL; @@ -13,10 +12,15 @@ AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], loff_t maxsize __attribute__ ((unused)) = 0; lseek_execute(fp, ip, offset, maxsize); - ], [lseek_exclusive], [fs/read_write.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], [ + AC_MSG_CHECKING([whether lseek_execute() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([lseek_execute], + [lseek_exclusive], [fs/read_write.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, - [lseek_execute() is available]) + AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, [lseek_execute() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 new file mode 100644 index 00000000000..86339aa0450 --- /dev/null +++ b/config/kernel-make-request-fn.m4 @@ -0,0 +1,77 @@ +dnl # +dnl # Check for make_request_fn interface. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ + ZFS_LINUX_TEST_SRC([make_request_fn_int], [ + #include + int make_request(struct request_queue *q, + struct bio *bio) { return (0); } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) + + ZFS_LINUX_TEST_SRC([make_request_fn_void], [ + #include + void make_request(struct request_queue *q, + struct bio *bio) { return; } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) + + ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ + #include + blk_qc_t make_request(struct request_queue *q, + struct bio *bio) { return (BLK_QC_T_NONE); } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ + dnl # + dnl # Legacy API + dnl # make_request_fn returns int. + dnl # + AC_MSG_CHECKING([whether make_request_fn() returns int]) + ZFS_LINUX_TEST_RESULT([make_request_fn_int], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, int, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, + [Noting that make_request_fn() returns int]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # Linux 3.2 API Change + dnl # make_request_fn returns void. + dnl # + AC_MSG_CHECKING([whether make_request_fn() returns void]) + ZFS_LINUX_TEST_RESULT([make_request_fn_void], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, void, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1, + [Noting that make_request_fn() returns void]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # Linux 4.4 API Change + dnl # make_request_fn returns blk_qc_t. + dnl # + AC_MSG_CHECKING( + [whether make_request_fn() returns blk_qc_t]) + ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, + [Noting that make_request_fn() ] + [returns blk_qc_t]) + ],[ + ZFS_LINUX_TEST_ERROR([make_request_fn]) + ]) + ]) + ]) +]) diff --git a/config/kernel-misc-minor.m4 b/config/kernel-misc-minor.m4 index a020d2ebca0..20fe2cd2f3c 100644 --- a/config/kernel-misc-minor.m4 +++ b/config/kernel-misc-minor.m4 @@ -6,7 +6,7 @@ dnl # number. Start with a large known available unreserved minor and work dnl # our way down to lower value if a collision is detected. dnl # AC_DEFUN([ZFS_AC_KERNEL_MISC_MINOR], [ - AC_MSG_CHECKING([for available /dev/zfs minor]) + AC_MSG_CHECKING([whether /dev/zfs minor is available]) for i in $(seq 249 -1 200); do if ! grep -q "^#define\s\+.*_MINOR\s\+.*$i" \ diff --git a/config/kernel-mk-request-fn.m4 b/config/kernel-mk-request-fn.m4 deleted file mode 100644 index 57eebe23de5..00000000000 --- a/config/kernel-mk-request-fn.m4 +++ /dev/null @@ -1,65 +0,0 @@ -dnl # -dnl # Linux 3.2 API Change -dnl # make_request_fn returns void instead of int. -dnl # -dnl # Linux 4.4 API Change -dnl # make_request_fn returns blk_qc_t. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ - AC_MSG_CHECKING([whether make_request_fn() returns int]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int make_request(struct request_queue *q, struct bio *bio) - { - return (0); - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, int, - [make_request_fn() returns int]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, - [Noting that make_request_fn() returns int]) - ],[ - AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether make_request_fn() returns void]) - ZFS_LINUX_TRY_COMPILE([ - #include - - void make_request(struct request_queue *q, struct bio *bio) - { - return; - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, void, - [make_request_fn() returns void]) - ],[ - AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t]) - ZFS_LINUX_TRY_COMPILE([ - #include - - blk_qc_t make_request(struct request_queue *q, struct bio *bio) - { - return (BLK_QC_T_NONE); - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, - [make_request_fn() returns blk_qc_t]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, - [Noting that make_request_fn() returns blk_qc_t]) - ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) - ]) - ]) - ]) -]) diff --git a/config/kernel-mkdir-umode-t.m4 b/config/kernel-mkdir-umode-t.m4 index 1c9fa9be3ce..f4dde29a37e 100644 --- a/config/kernel-mkdir-umode-t.m4 +++ b/config/kernel-mkdir-umode-t.m4 @@ -6,9 +6,8 @@ dnl # would also change all three prototypes. However, if it turns out that dnl # some distribution doesn't backport the whole thing this could be dnl # broken apart into three separate checks. dnl # -AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ - AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T], [ + ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [ #include int mkdir(struct inode *inode, struct dentry *dentry, @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ iops __attribute__ ((unused)) = { .mkdir = mkdir, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ + AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t]) + ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, [iops->create()/mkdir()/mknod() take umode_t]) diff --git a/config/kernel-mod-param.m4 b/config/kernel-mod-param.m4 index b72be684a44..e00f19d61e7 100644 --- a/config/kernel-mod-param.m4 +++ b/config/kernel-mod-param.m4 @@ -2,9 +2,8 @@ dnl # dnl # Grsecurity kernel API change dnl # constified parameters of module_param_call() methods dnl # -AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ - AC_MSG_CHECKING([whether module_param_call() is hardened]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST], [ + ZFS_LINUX_TEST_SRC([module_param_call], [ #include #include @@ -19,8 +18,12 @@ AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ } module_param_call(p, param_set, param_get, NULL, 0644); - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ + AC_MSG_CHECKING([whether module_param_call() is hardened]) + ZFS_LINUX_TEST_RESULT([module_param_call], [ AC_MSG_RESULT(yes) AC_DEFINE(MODULE_PARAM_CALL_CONST, 1, [hardened module_param_call]) diff --git a/config/kernel-objtool.m4 b/config/kernel-objtool.m4 index 467329b2541..bf60e786921 100644 --- a/config/kernel-objtool.m4 +++ b/config/kernel-objtool.m4 @@ -1,41 +1,44 @@ dnl # -dnl # 4.6 API for compile-time stack validation +dnl # Check for objtool support. dnl # -AC_DEFUN([ZFS_AC_KERNEL_OBJTOOL], [ - AC_MSG_CHECKING([for compile-time stack validation (objtool)]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_OBJTOOL], [ + + dnl # 4.6 API for compile-time stack validation + ZFS_LINUX_TEST_SRC([objtool], [ #undef __ASSEMBLY__ #include ],[ #if !defined(FRAME_BEGIN) CTASSERT(1); #endif - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_OBJTOOL, 1, - [kernel does stack verification]) - - ZFS_AC_KERNEL_STACK_FRAME_NON_STANDARD - ],[ - AC_MSG_RESULT(no) ]) -]) -dnl # -dnl # 4.6 API added STACK_FRAME_NON_STANDARD macro -dnl # -AC_DEFUN([ZFS_AC_KERNEL_STACK_FRAME_NON_STANDARD], [ - AC_MSG_CHECKING([whether STACK_FRAME_NON_STANDARD is defined]) - ZFS_LINUX_TRY_COMPILE([ + dnl # 4.6 API added STACK_FRAME_NON_STANDARD macro + ZFS_LINUX_TEST_SRC([stack_frame_non_standard], [ #include ],[ #if !defined(STACK_FRAME_NON_STANDARD) CTASSERT(1); #endif - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_OBJTOOL], [ + AC_MSG_CHECKING( + [whether compile-time stack validation (objtool) is available]) + ZFS_LINUX_TEST_RESULT([objtool], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STACK_FRAME_NON_STANDARD, 1, - [STACK_FRAME_NON_STANDARD is defined]) + AC_DEFINE(HAVE_KERNEL_OBJTOOL, 1, + [kernel does stack verification]) + + AC_MSG_CHECKING([whether STACK_FRAME_NON_STANDARD is defined]) + ZFS_LINUX_TEST_RESULT([stack_frame_non_standard], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STACK_FRAME_NON_STANDARD, 1, + [STACK_FRAME_NON_STANDARD is defined]) + ],[ + AC_MSG_RESULT(no) + ]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-open-bdev-exclusive.m4 b/config/kernel-open-bdev-exclusive.m4 index 0661315a610..2e46b8876a4 100644 --- a/config/kernel-open-bdev-exclusive.m4 +++ b/config/kernel-open-bdev-exclusive.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.28 API change dnl # open/close_bdev_excl() renamed to open/close_bdev_exclusive() dnl # -AC_DEFUN([ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE], - [AC_MSG_CHECKING([whether open_bdev_exclusive() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE], [ + ZFS_LINUX_TEST_SRC([open_bdev_exclusive], [ #include ], [ open_bdev_exclusive(NULL, 0, NULL); - ], [open_bdev_exclusive], [fs/block_dev.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE], [ + AC_MSG_CHECKING([whether open_bdev_exclusive() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([open_bdev_exclusive], + [open_bdev_exclusive], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_OPEN_BDEV_EXCLUSIVE, 1, - [open_bdev_exclusive() is available]) + [open_bdev_exclusive() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-pde-data.m4 b/config/kernel-pde-data.m4 index 8aa4c2204e8..928c5ef0d88 100644 --- a/config/kernel-pde-data.m4 +++ b/config/kernel-pde-data.m4 @@ -2,15 +2,19 @@ dnl # dnl # 3.10 API change, dnl # PDE is replaced by PDE_DATA dnl # -AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [ - AC_MSG_CHECKING([whether PDE_DATA() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_PDE_DATA], [ + ZFS_LINUX_TEST_SRC([pde_data], [ #include ], [ PDE_DATA(NULL); - ], [PDE_DATA], [], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [ + AC_MSG_CHECKING([whether PDE_DATA() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([pde_data], [PDE_DATA], [], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PDE_DATA, 1, [yes]) + AC_DEFINE(HAVE_PDE_DATA, 1, [PDE_DATA is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-put-link.m4 b/config/kernel-put-link.m4 index a0bb36ef277..f03df9e99bf 100644 --- a/config/kernel-put-link.m4 +++ b/config/kernel-put-link.m4 @@ -1,17 +1,35 @@ dnl # dnl # Supported symlink APIs dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ + ZFS_LINUX_TEST_SRC([put_link_cookie], [ + #include + void put_link(struct inode *ip, void *cookie) + { return; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .put_link = put_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([put_link_nameidata], [ + #include + void put_link(struct dentry *de, struct + nameidata *nd, void *ptr) { return; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .put_link = put_link, + }; + ],[]) +]) + AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ dnl # dnl # 4.5 API change dnl # get_link() uses delayed done, there is no put_link() interface. + dnl # This check intially uses the inode_operations_get_link result dnl # - ZFS_LINUX_TRY_COMPILE([ - #if !defined(HAVE_GET_LINK_DELAYED) - #error "Expecting get_link() delayed done" - #endif - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ AC_DEFINE(HAVE_PUT_LINK_DELAYED, 1, [iops->put_link() delayed]) ],[ dnl # @@ -19,41 +37,24 @@ AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ dnl # This kernel retired the nameidata structure. dnl # AC_MSG_CHECKING([whether iops->put_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ - #include - void put_link(struct inode *ip, void *cookie) - { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([put_link_cookie], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PUT_LINK_COOKIE, 1, [iops->put_link() cookie]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 2.6.32 API dnl # - AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether iops->put_link() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ - #include - void put_link(struct dentry *de, struct - nameidata *nd, void *ptr) { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([put_link_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PUT_LINK_NAMEIDATA, 1, [iops->put_link() nameidata]) ],[ - AC_MSG_ERROR(no; please file a bug report) + ZFS_LINUX_TEST_ERROR([put_link]) ]) ]) ]) diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 9f894fb4db1..f707391539d 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -3,9 +3,8 @@ dnl # 4.9 API change, dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants dnl # flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ - AC_MSG_CHECKING([whether iops->rename() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_operations_rename], [ #include int rename_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, @@ -15,10 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ iops __attribute__ ((unused)) = { .rename = rename_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, [iops->rename() wants flags]) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-rw.m4 b/config/kernel-rw.m4 index 1c8a265e0ae..85b47d5c6fc 100644 --- a/config/kernel-rw.m4 +++ b/config/kernel-rw.m4 @@ -3,11 +3,8 @@ dnl # 4.14 API change dnl # kernel_write() which was introduced in 3.9 was updated to take dnl # the offset as a pointer which is needed by vn_rdwr(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ - AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITE], [ + ZFS_LINUX_TEST_SRC([kernel_write], [ #include ],[ struct file *file = NULL; @@ -17,14 +14,18 @@ AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ ssize_t ret; ret = kernel_write(file, buf, count, pos); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ + AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) + ZFS_LINUX_TEST_RESULT([kernel_write], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1, [kernel_write() take loff_t pointer]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -32,11 +33,8 @@ dnl # 4.14 API change dnl # kernel_read() which has existed for forever was updated to take dnl # the offset as a pointer which is needed by vn_rdwr(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_READ], [ - AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_READ], [ + ZFS_LINUX_TEST_SRC([kernel_read], [ #include ],[ struct file *file = NULL; @@ -46,12 +44,26 @@ AC_DEFUN([ZFS_AC_KERNEL_READ], [ ssize_t ret; ret = kernel_read(file, buf, count, pos); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_READ], [ + AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) + ZFS_LINUX_TEST_RESULT([kernel_read], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1, [kernel_read() take loff_t pointer]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_RW], [ + ZFS_AC_KERNEL_SRC_WRITE + ZFS_AC_KERNEL_SRC_READ +]) + +AC_DEFUN([ZFS_AC_KERNEL_RW], [ + ZFS_AC_KERNEL_WRITE + ZFS_AC_KERNEL_READ ]) diff --git a/config/kernel-rwsem.m4 b/config/kernel-rwsem.m4 index 532c2271818..67c5cf908a3 100644 --- a/config/kernel-rwsem.m4 +++ b/config/kernel-rwsem.m4 @@ -4,25 +4,26 @@ dnl # dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1. dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [ - AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW], [ + ZFS_LINUX_TEST_SRC([rwsem_spinlock_is_raw], [ #include ],[ struct rw_semaphore dummy_semaphore __attribute__ ((unused)); raw_spinlock_t dummy_lock __attribute__ ((unused)) = __RAW_SPIN_LOCK_INITIALIZER(dummy_lock); dummy_semaphore.wait_lock = dummy_lock; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [ + AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw]) + ZFS_LINUX_TEST_RESULT([rwsem_spinlock_is_raw], [ AC_MSG_RESULT(yes) AC_DEFINE(RWSEM_SPINLOCK_IS_RAW, 1, - [struct rw_semaphore member wait_lock is raw_spinlock_t]) + [struct rw_semaphore member wait_lock is raw_spinlock_t]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -30,23 +31,24 @@ dnl # 3.16 API Change dnl # dnl # rwsem-spinlock "->activity" changed to "->count" dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ - AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY], [ + ZFS_LINUX_TEST_SRC([rwsem_activity], [ #include ],[ struct rw_semaphore dummy_semaphore __attribute__ ((unused)); dummy_semaphore.activity = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ + AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) + ZFS_LINUX_TEST_RESULT([rwsem_activity], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1, - [struct rw_semaphore has member activity]) + [struct rw_semaphore has member activity]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -54,22 +56,35 @@ dnl # 4.8 API Change dnl # dnl # rwsem "->count" changed to atomic_long_t type dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ - AC_MSG_CHECKING( - [whether struct rw_semaphore has atomic_long_t member count]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT], [ + ZFS_LINUX_TEST_SRC([rwsem_atomic_long_count], [ #include ],[ DECLARE_RWSEM(dummy_semaphore); (void) atomic_long_read(&dummy_semaphore.count); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ + AC_MSG_CHECKING( + [whether struct rw_semaphore has atomic_long_t member count]) + ZFS_LINUX_TEST_RESULT([rwsem_atomic_long_count], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1, - [struct rw_semaphore has atomic_long_t member count]) + [struct rw_semaphore has atomic_long_t member count]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [ + ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW + ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY + ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [ + ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW + ZFS_AC_KERNEL_RWSEM_ACTIVITY + ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT ]) diff --git a/config/kernel-sched.m4 b/config/kernel-sched.m4 index 640b008aab8..4a7db970aef 100644 --- a/config/kernel-sched.m4 +++ b/config/kernel-sched.m4 @@ -2,14 +2,18 @@ dnl # dnl # 3.9 API change, dnl # Moved things from linux/sched.h to linux/sched/rt.h dnl # -AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], - [AC_MSG_CHECKING([whether header linux/sched/rt.h exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER], [ + ZFS_LINUX_TEST_SRC([sched_rt_header], [ #include #include ],[ return 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], [ + AC_MSG_CHECKING([whether header linux/sched/rt.h exists]) + ZFS_LINUX_TEST_RESULT([sched_rt_header], [ AC_DEFINE(HAVE_SCHED_RT_HEADER, 1, [linux/sched/rt.h exists]) AC_MSG_RESULT(yes) ],[ @@ -21,36 +25,59 @@ dnl # dnl # 4.11 API change, dnl # Moved things from linux/sched.h to linux/sched/signal.h dnl # -AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], - [AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER], [ + ZFS_LINUX_TEST_SRC([sched_signal_header], [ #include #include ],[ return 0; - ],[ - AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, [linux/sched/signal.h exists]) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], [ + AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) + ZFS_LINUX_TEST_RESULT([sched_signal_header], [ + AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, + [linux/sched/signal.h exists]) AC_MSG_RESULT(yes) ],[ AC_MSG_RESULT(no) ]) ]) + dnl # dnl # 3.19 API change dnl # The io_schedule_timeout() function is present in all 2.6.32 kernels dnl # but it was not exported until Linux 3.19. The RHEL 7.x kernels which dnl # are based on a 3.10 kernel do export this symbol. dnl # -AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ - AC_MSG_CHECKING([whether io_schedule_timeout() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT], [ + ZFS_LINUX_TEST_SRC([io_schedule_timeout], [ #include ], [ (void) io_schedule_timeout(1); - ], [io_schedule_timeout], [], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ + AC_MSG_CHECKING([whether io_schedule_timeout() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([io_schedule_timeout], + [io_schedule_timeout], [], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IO_SCHEDULE_TIMEOUT, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED], [ + ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER + ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER + ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED], [ + ZFS_AC_KERNEL_SCHED_RT_HEADER + ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER + ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT +]) diff --git a/config/kernel-security-inode-init.m4 b/config/kernel-security-inode-init.m4 index a62176d42b5..0dea7e3925b 100644 --- a/config/kernel-security-inode-init.m4 +++ b/config/kernel-security-inode-init.m4 @@ -5,9 +5,8 @@ dnl # qstr argument which must be passed in from the dentry if available. dnl # Passing a NULL is safe when no qstr is available the relevant dnl # security checks will just be skipped. dnl # -AC_DEFUN([ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY], [ - AC_MSG_CHECKING([whether security_inode_init_security wants 6 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS], [ + ZFS_LINUX_TEST_SRC([security_inode_init_security_6args], [ #include ],[ struct inode *ip __attribute__ ((unused)) = NULL; @@ -18,10 +17,15 @@ AC_DEFUN([ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY], [ size_t len __attribute__ ((unused)) = 0; security_inode_init_security(ip, dip, str, &name, &value, &len); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS], [ + AC_MSG_CHECKING([whether security_inode_init_security wants 6 args]) + ZFS_LINUX_TEST_RESULT([security_inode_init_security_6args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY, 1, - [security_inode_init_security wants 6 args]) + [security_inode_init_security wants 6 args]) ],[ AC_MSG_RESULT(no) ]) @@ -34,9 +38,8 @@ dnl # a filesystem specific callback to write security extended attributes. dnl # This was done to support the initialization of multiple LSM xattrs dnl # and the EVM xattr. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY], [ - AC_MSG_CHECKING([whether security_inode_init_security wants callback]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_CALLBACK], [ + ZFS_LINUX_TEST_SRC([security_inode_init_security], [ #include ],[ struct inode *ip __attribute__ ((unused)) = NULL; @@ -45,11 +48,26 @@ AC_DEFUN([ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY], [ initxattrs func __attribute__ ((unused)) = NULL; security_inode_init_security(ip, dip, str, func, NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_CALLBACK], [ + AC_MSG_CHECKING([whether security_inode_init_security wants callback]) + ZFS_LINUX_TEST_RESULT([security_inode_init_security], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY, 1, - [security_inode_init_security wants callback]) + [security_inode_init_security wants callback]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE], [ + ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS + ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_CALLBACK +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE], [ + ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS + ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_CALLBACK +]) diff --git a/config/kernel-set-nlink.m4 b/config/kernel-set-nlink.m4 index f7ffc0d3a5e..63a5a8c0dac 100644 --- a/config/kernel-set-nlink.m4 +++ b/config/kernel-set-nlink.m4 @@ -2,18 +2,21 @@ dnl # dnl # Linux v3.2-rc1 API change dnl # SHA: bfe8684869601dacfcb2cd69ef8cfd9045f62170 dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_NLINK], [ - AC_MSG_CHECKING([whether set_nlink() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_NLINK], [ + ZFS_LINUX_TEST_SRC([set_nlink], [ #include ],[ struct inode node; unsigned int link = 0; (void) set_nlink(&node, link); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SET_NLINK], [ + AC_MSG_CHECKING([whether set_nlink() is available]) + ZFS_LINUX_TEST_RESULT([set_nlink], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_NLINK, 1, - [set_nlink() is available]) + AC_DEFINE(HAVE_SET_NLINK, 1, [set_nlink() is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4 index 32f7deb77ab..45408c45c69 100644 --- a/config/kernel-setattr-prepare.m4 +++ b/config/kernel-setattr-prepare.m4 @@ -3,17 +3,21 @@ dnl # 4.9 API change dnl # The inode_change_ok() function has been renamed setattr_prepare() dnl # and updated to take a dentry rather than an inode. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], - [AC_MSG_CHECKING([whether setattr_prepare() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [ + ZFS_LINUX_TEST_SRC([setattr_prepare], [ #include ], [ struct dentry *dentry = NULL; struct iattr *attr = NULL; - int error; + int error __attribute__ ((unused)) = + setattr_prepare(dentry, attr); + ]) +]) - error = setattr_prepare(dentry, attr); - ], [setattr_prepare], [fs/attr.c], [ +AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [ + AC_MSG_CHECKING([whether setattr_prepare() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare], + [setattr_prepare], [fs/attr.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SETATTR_PREPARE, 1, [setattr_prepare() is available]) diff --git a/config/kernel-sget-args.m4 b/config/kernel-sget-args.m4 index 9d1745925f3..13581399ecc 100644 --- a/config/kernel-sget-args.m4 +++ b/config/kernel-sget-args.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.6 API change, dnl # 'sget' now takes the mount flags as an argument. dnl # -AC_DEFUN([ZFS_AC_KERNEL_5ARG_SGET], - [AC_MSG_CHECKING([whether sget() wants 5 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SGET], [ + ZFS_LINUX_TEST_SRC([sget_5args], [ #include ],[ struct file_system_type *type = NULL; @@ -13,11 +12,15 @@ AC_DEFUN([ZFS_AC_KERNEL_5ARG_SGET], int flags = 0; void *data = NULL; (void) sget(type, test, set, flags, data); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SGET], [ + AC_MSG_CHECKING([whether sget() wants 5 args]) + ZFS_LINUX_TEST_RESULT([sget_5args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_5ARG_SGET, 1, [sget() wants 5 args]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-show-options.m4 b/config/kernel-show-options.m4 index 67d683c55e3..9e426bc3917 100644 --- a/config/kernel-show-options.m4 +++ b/config/kernel-show-options.m4 @@ -1,21 +1,26 @@ dnl # dnl # Linux 3.3 API dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHOW_OPTIONS], [ - AC_MSG_CHECKING([whether sops->show_options() wants dentry]) - - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHOW_OPTIONS], [ + ZFS_LINUX_TEST_SRC([super_operations_show_options], [ #include - int show_options (struct seq_file * x, struct dentry * y) { return 0; }; + int show_options(struct seq_file * x, struct dentry * y) { + return 0; + }; + static struct super_operations sops __attribute__ ((unused)) = { .show_options = show_options, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHOW_OPTIONS], [ + AC_MSG_CHECKING([whether sops->show_options() wants dentry]) + ZFS_LINUX_TEST_RESULT([super_operations_show_options], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_SHOW_OPTIONS_WITH_DENTRY, 1, - [sops->show_options() with dentry]) + [sops->show_options() with dentry]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 405cbf42cf3..45b4b5d4b2c 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -4,9 +4,8 @@ dnl # The super_block structure now stores a per-filesystem shrinker. dnl # This interface is preferable because it can be used to specifically dnl # target only the zfs filesystem for pruning. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK], [ - AC_MSG_CHECKING([whether super_block has s_shrink]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ + ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include int shrink(struct shrinker *s, struct shrink_control *sc) @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK], [ .s_shrink.seeks = DEFAULT_SEEKS, .s_shrink.batch = 0, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ + AC_MSG_CHECKING([whether super_block has s_shrink]) + ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK, 1, [struct super_block has s_shrink]) @@ -50,15 +53,18 @@ dnl # a list_head is used. Then to prevent the spinning from occurring dnl # the .next pointer is set to the fs_supers list_head which ensures dnl # the iterate_supers_type() function will always terminate. dnl # -AC_DEFUN([ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD], [ - AC_MSG_CHECKING([whether super_block has s_instances list_head]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [ + ZFS_LINUX_TEST_SRC([super_block_s_instances_list_head], [ #include ],[ struct super_block sb __attribute__ ((unused)); - INIT_LIST_HEAD(&sb.s_instances); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [ + AC_MSG_CHECKING([whether super_block has s_instances list_head]) + ZFS_LINUX_TEST_RESULT([super_block_s_instances_list_head], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_S_INSTANCES_LIST_HEAD, 1, [struct super_block has s_instances list_head]) @@ -67,9 +73,8 @@ AC_DEFUN([ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ - AC_MSG_CHECKING([whether sops->nr_cached_objects() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS], [ + ZFS_LINUX_TEST_SRC([nr_cached_objects], [ #include int nr_cached_objects(struct super_block *sb) { return 0; } @@ -78,19 +83,22 @@ AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ sops __attribute__ ((unused)) = { .nr_cached_objects = nr_cached_objects, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ + AC_MSG_CHECKING([whether sops->nr_cached_objects() exists]) + ZFS_LINUX_TEST_RESULT([nr_cached_objects], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_NR_CACHED_OBJECTS, 1, - [sops->nr_cached_objects() exists]) + [sops->nr_cached_objects() exists]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ - AC_MSG_CHECKING([whether sops->free_cached_objects() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS], [ + ZFS_LINUX_TEST_SRC([free_cached_objects], [ #include void free_cached_objects(struct super_block *sb, int x) @@ -100,11 +108,15 @@ AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ sops __attribute__ ((unused)) = { .free_cached_objects = free_cached_objects, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ + AC_MSG_CHECKING([whether sops->free_cached_objects() exists]) + ZFS_LINUX_TEST_RESULT([free_cached_objects], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FREE_CACHED_OBJECTS, 1, - [sops->free_cached_objects() exists]) + [sops->free_cached_objects() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -115,15 +127,19 @@ dnl # 3.12 API change dnl # The nid member was added to struct shrink_control to support dnl # NUMA-aware shrinkers. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ - AC_MSG_CHECKING([whether shrink_control has nid]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ + ZFS_LINUX_TEST_SRC([shrink_control_nid], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); unsigned long scnidsize __attribute__ ((unused)) = sizeof(sc.nid); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ + AC_MSG_CHECKING([whether shrink_control has nid]) + ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ AC_MSG_RESULT(yes) AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, [struct shrink_control has nid]) @@ -132,84 +148,96 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ + ZFS_LINUX_TEST_SRC([shrinker_cb_2arg], [ + #include + int shrinker_cb(int nr_to_scan, gfp_t gfp_mask) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .shrink = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) -AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - dnl # - dnl # 2.6.23 to 2.6.34 API change - dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask) - dnl # - AC_MSG_CHECKING([whether old 2-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([shrinker_cb_3arg], [ #include + int shrinker_cb(struct shrinker *shrink, int nr_to_scan, + gfp_t gfp_mask) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .shrink = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) - int shrinker_cb(int nr_to_scan, gfp_t gfp_mask) { - return 0; - } + ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ + #include + int shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); + ]) + + ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ + #include + unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } ],[ + struct shrinker cache_shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ + dnl # + dnl # 2.6.23 to 2.6.34 API change + dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask) + dnl # + AC_MSG_CHECKING([whether old 2-argument shrinker exists]) + ZFS_LINUX_TEST_RESULT([shrinker_cb_2arg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_OLD_SHRINKER_CALLBACK, 1, - [old shrinker callback wants 2 args]) + [old shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 2.6.35 - 2.6.39 API change dnl # ->shrink(struct shrinker *, dnl # int nr_to_scan, gfp_t gfp_mask) dnl # AC_MSG_CHECKING([whether old 3-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int shrinker_cb(struct shrinker *shrink, int nr_to_scan, - gfp_t gfp_mask) { - return 0; - } - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + ZFS_LINUX_TEST_RESULT([shrinker_cb_3arg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_SHRINKER_CALLBACK, 1, [old shrinker callback wants 3 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 3.0 - 3.11 API change dnl # ->shrink(struct shrinker *, dnl # struct shrink_control *sc) dnl # AC_MSG_CHECKING( - [whether new 2-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int shrinker_cb(struct shrinker *shrink, - struct shrink_control *sc) { - return 0; - } - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + [whether new 2-argument shrinker exists]) + ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_NEW_SHRINKER_CALLBACK, 1, [new shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 3.12 API change, dnl # ->shrink() is logically split in to @@ -217,52 +245,61 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # AC_MSG_CHECKING( [whether ->count_objects callback exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - unsigned long shrinker_cb( - struct shrinker *shrink, - struct shrink_control *sc) { - return 0; - } - ],[ - struct shrinker cache_shrinker = { - .count_objects = shrinker_cb, - .scan_objects = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + ZFS_LINUX_TEST_RESULT( + [shrinker_cb_shrink_control_split], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, [->count_objects exists]) ],[ - AC_MSG_ERROR(error) + ZFS_LINUX_TEST_ERROR([shrinker]) ]) ]) ]) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # dnl # 2.6.39 API change, dnl # Shrinker adjust to use common shrink_control structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ - AC_MSG_CHECKING([whether struct shrink_control exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ + ZFS_LINUX_TEST_SRC([shrink_control_struct], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); sc.nr_to_scan = 0; sc.gfp_mask = GFP_KERNEL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ + AC_MSG_CHECKING([whether struct shrink_control exists]) + ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, - [struct shrink_control exists]) + [struct shrink_control exists]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD + ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS + ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS + ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID + ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK + ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ + ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK + ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD + ZFS_AC_KERNEL_NR_CACHED_OBJECTS + ZFS_AC_KERNEL_FREE_CACHED_OBJECTS + ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID + ZFS_AC_KERNEL_SHRINKER_CALLBACK + ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT +]) diff --git a/config/kernel-submit_bio.m4 b/config/kernel-submit_bio.m4 index da5f85ca72c..cf80e9b83e3 100644 --- a/config/kernel-submit_bio.m4 +++ b/config/kernel-submit_bio.m4 @@ -3,15 +3,19 @@ dnl # 4.8 API change dnl # The rw argument has been removed from submit_bio/submit_bio_wait. dnl # Callers are now expected to set bio->bi_rw instead of passing it in. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SUBMIT_BIO], [ - AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUBMIT_BIO], [ + ZFS_LINUX_TEST_SRC([submit_bio], [ #include ],[ blk_qc_t blk_qc; struct bio *bio = NULL; blk_qc = submit_bio(bio); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUBMIT_BIO], [ + AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) + ZFS_LINUX_TEST_RESULT([submit_bio], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg]) ],[ diff --git a/config/kernel-super-userns.m4 b/config/kernel-super-userns.m4 index de94ad967ac..1ad35f2d19b 100644 --- a/config/kernel-super-userns.m4 +++ b/config/kernel-super-userns.m4 @@ -3,15 +3,19 @@ dnl # 4.8 API change dnl # struct user_namespace was added to struct super_block as dnl # super->s_user_ns member dnl # -AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ - AC_MSG_CHECKING([whether super_block->s_user_ns exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_USER_NS], [ + ZFS_LINUX_TEST_SRC([super_user_ns], [ #include #include - ],[ + ], [ struct super_block super; super.s_user_ns = (struct user_namespace *)NULL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ + AC_MSG_CHECKING([whether super_block->s_user_ns exists]) + ZFS_LINUX_TEST_RESULT([super_user_ns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_USER_NS, 1, [super_block->s_user_ns exists]) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index d9064204307..403cff3f418 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -8,13 +8,9 @@ dnl # kernels that support the new timer_list.func signature. dnl # dnl # Also check for the existence of flags in struct timer_list, they were dnl # added in 4.1-rc8 via 0eeda71bc30d. - -AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ - AC_MSG_CHECKING([whether timer_setup() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ + ZFS_LINUX_TEST_SRC([timer_setup], [ #include struct my_task_timer { @@ -24,13 +20,34 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ void task_expire(struct timer_list *tl) { - struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); + struct my_task_timer *task_timer = + from_timer(task_timer, tl, timer); task_timer->data = 42; } ],[ struct my_task_timer task_timer; timer_setup(&task_timer.timer, task_expire, 0); + ]) + + ZFS_LINUX_TEST_SRC([timer_list_function], [ + #include + void task_expire(struct timer_list *tl) {} ],[ + struct timer_list tl; + tl.function = task_expire; + ]) + + ZFS_LINUX_TEST_SRC([timer_list_flags], [ + #include + ],[ + struct timer_list tl; + tl.flags = 2; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ + AC_MSG_CHECKING([whether timer_setup() is available]) + ZFS_LINUX_TEST_RESULT([timer_setup], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, [timer_setup() is available]) @@ -39,14 +56,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ ]) AC_MSG_CHECKING([whether timer function expects timer_list]) - - ZFS_LINUX_TRY_COMPILE([ - #include - void task_expire(struct timer_list *tl) {} - ],[ - struct timer_list tl; - tl.function = task_expire; - ],[ + ZFS_LINUX_TEST_RESULT([timer_list_function], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, [timer_list.function gets a timer_list]) @@ -55,19 +65,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ ]) AC_MSG_CHECKING([whether struct timer_list has flags]) - - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct timer_list tl; - tl.flags = 2; - ],[ + ZFS_LINUX_TEST_RESULT([timer_list_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, [struct timer_list has a flags member]) ],[ AC_MSG_RESULT(no) ]) - - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 index 5aad90450e8..f510bfe6ba0 100644 --- a/config/kernel-tmpfile.m4 +++ b/config/kernel-tmpfile.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.11 API change dnl # Add support for i_op->tmpfile dnl # -AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ - AC_MSG_CHECKING([whether i_op->tmpfile() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ #include int tmpfile(struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } @@ -12,11 +11,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ iops __attribute__ ((unused)) = { .tmpfile = tmpfile, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ + AC_MSG_CHECKING([whether i_op->tmpfile() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, - [i_op->tmpfile() exists]) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-totalhigh_pages.m4 b/config/kernel-totalhigh_pages.m4 index b22e86d4dbc..4ecb03a50a5 100644 --- a/config/kernel-totalhigh_pages.m4 +++ b/config/kernel-totalhigh_pages.m4 @@ -1,16 +1,18 @@ dnl # dnl # 5.0 API change dnl # -dnl # ca79b0c211af mm: convert totalram_pages and totalhigh_pages variables to atomic -dnl # -AC_DEFUN([ZFS_AC_KERNEL_TOTALHIGH_PAGES], [ - AC_MSG_CHECKING([whether totalhigh_pages() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES], [ + ZFS_LINUX_TEST_SRC([totalhigh_pages], [ #include ],[ unsigned long pages __attribute__ ((unused)); pages = totalhigh_pages(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TOTALHIGH_PAGES], [ + AC_MSG_CHECKING([whether totalhigh_pages() exists]) + ZFS_LINUX_TEST_RESULT([totalhigh_pages], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TOTALHIGH_PAGES, 1, [totalhigh_pages() exists]) ],[ diff --git a/config/kernel-totalram-pages-func.m4 b/config/kernel-totalram-pages-func.m4 index a6eac645431..d0e812a8d2d 100644 --- a/config/kernel-totalram-pages-func.m4 +++ b/config/kernel-totalram-pages-func.m4 @@ -2,16 +2,21 @@ dnl # dnl # Linux 5.0: totalram_pages is no longer a global variable, and must be dnl # read via the totalram_pages() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC], [ - AC_MSG_CHECKING([whether totalram_pages() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC], [ + ZFS_LINUX_TEST_SRC([totalram_pages], [ #include ],[ unsigned long pages __attribute__ ((unused)); pages = totalram_pages(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC], [ + AC_MSG_CHECKING([whether totalram_pages() exists]) + ZFS_LINUX_TEST_RESULT([totalram_pages], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TOTALRAM_PAGES_FUNC, 1, [kernel has totalram_pages()]) + AC_DEFINE(HAVE_TOTALRAM_PAGES_FUNC, 1, + [kernel has totalram_pages()]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-truncate-range.m4 b/config/kernel-truncate-range.m4 index da2cb50fcbc..8fdbb10869b 100644 --- a/config/kernel-truncate-range.m4 +++ b/config/kernel-truncate-range.m4 @@ -4,17 +4,20 @@ dnl # torvalds/linux@17cf28afea2a1112f240a3a2da8af883be024811 removed dnl # truncate_range(). The file hole punching functionality is now dnl # provided by fallocate() dnl # -AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_RANGE], [ - AC_MSG_CHECKING([whether iops->truncate_range() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE], [ + ZFS_LINUX_TEST_SRC([inode_operations_truncate_range], [ #include void truncate_range(struct inode *inode, loff_t start, loff_t end) { return; } static struct inode_operations iops __attribute__ ((unused)) = { .truncate_range = truncate_range, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_RANGE], [ + AC_MSG_CHECKING([whether iops->truncate_range() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_truncate_range], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_TRUNCATE_RANGE, 1, [iops->truncate_range() exists]) diff --git a/config/kernel-truncate-setsize.m4 b/config/kernel-truncate-setsize.m4 index 7e4aff479a9..e719c1444ab 100644 --- a/config/kernel-truncate-setsize.m4 +++ b/config/kernel-truncate-setsize.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.35 API change dnl # Added truncate_setsize() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_SETSIZE], - [AC_MSG_CHECKING([whether truncate_setsize() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE], [ + ZFS_LINUX_TEST_SRC([truncate_setsize], [ #include ], [ truncate_setsize(NULL, 0); - ], [truncate_setsize], [mm/truncate.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_SETSIZE], [ + AC_MSG_CHECKING([whether truncate_setsize() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([truncate_setsize], + [truncate_setsize], [mm/truncate.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TRUNCATE_SETSIZE, 1, - [truncate_setsize() is available]) + [truncate_setsize() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-userns-capabilities.m4 b/config/kernel-userns-capabilities.m4 index fa3381978bf..5dcbc03d3b1 100644 --- a/config/kernel-userns-capabilities.m4 +++ b/config/kernel-userns-capabilities.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.38 API change dnl # ns_capable() was introduced dnl # -AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ - AC_MSG_CHECKING([whether ns_capable exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_NS_CAPABLE], [ + ZFS_LINUX_TEST_SRC([ns_capable], [ #include ],[ ns_capable((struct user_namespace *)NULL, CAP_SYS_ADMIN); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ + AC_MSG_CHECKING([whether ns_capable exists]) + ZFS_LINUX_TEST_RESULT([ns_capable], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NS_CAPABLE, 1, - [ns_capable exists]) + AC_DEFINE(HAVE_NS_CAPABLE, 1, [ns_capable exists]) ],[ AC_MSG_RESULT(no) ]) @@ -23,17 +26,20 @@ dnl # struct user_namespace was added to struct cred_t as dnl # cred->user_ns member dnl # Note that current_user_ns() was added in 2.6.28. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CRED_USER_NS], [ - AC_MSG_CHECKING([whether cred_t->user_ns exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CRED_USER_NS], [ + ZFS_LINUX_TEST_SRC([cred_user_ns], [ #include ],[ struct cred cr; cr.user_ns = (struct user_namespace *)NULL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CRED_USER_NS], [ + AC_MSG_CHECKING([whether cred_t->user_ns exists]) + ZFS_LINUX_TEST_RESULT([cred_user_ns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CRED_USER_NS, 1, - [cred_t->user_ns exists]) + AC_DEFINE(HAVE_CRED_USER_NS, 1, [cred_t->user_ns exists]) ],[ AC_MSG_RESULT(no) ]) @@ -44,14 +50,18 @@ dnl # 3.4 API change dnl # kuid_has_mapping() and kgid_has_mapping() were added to distinguish dnl # between internal kernel uids/gids and user namespace uids/gids. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ - AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING], [ + ZFS_LINUX_TEST_SRC([kuid_has_mapping], [ #include ],[ kuid_has_mapping((struct user_namespace *)NULL, KUIDT_INIT(0)); kgid_has_mapping((struct user_namespace *)NULL, KGIDT_INIT(0)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ + AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist]) + ZFS_LINUX_TEST_RESULT([kuid_has_mapping], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KUID_HAS_MAPPING, 1, [kuid_has_mapping/kgid_has_mapping exist]) @@ -60,6 +70,12 @@ AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES], [ + ZFS_AC_KERNEL_SRC_NS_CAPABLE + ZFS_AC_KERNEL_SRC_CRED_USER_NS + ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING +]) + AC_DEFUN([ZFS_AC_KERNEL_USERNS_CAPABILITIES], [ ZFS_AC_KERNEL_NS_CAPABLE ZFS_AC_KERNEL_CRED_USER_NS diff --git a/config/kernel-urange-sleep.m4 b/config/kernel-usleep_range.m4 similarity index 60% rename from config/kernel-urange-sleep.m4 rename to config/kernel-usleep_range.m4 index b5764de3ed6..5bf051ab4fa 100644 --- a/config/kernel-urange-sleep.m4 +++ b/config/kernel-usleep_range.m4 @@ -1,20 +1,23 @@ dnl # -dnl # 2.6.36 API compatibility. -dnl # Added usleep_range timer. +dnl # 2.6.36 API compatibility- Added usleep_range timer. +dnl # dnl # usleep_range is a finer precision implementation of msleep dnl # designed to be a drop-in replacement for udelay where a precise dnl # sleep / busy-wait is unnecessary. dnl # -AC_DEFUN([ZFS_AC_KERNEL_USLEEP_RANGE], [ - AC_MSG_CHECKING([whether usleep_range() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_USLEEP_RANGE], [ + ZFS_LINUX_TEST_SRC([usleep_range], [ #include ],[ usleep_range(0, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_USLEEP_RANGE], [ + AC_MSG_CHECKING([whether usleep_range() is available]) + ZFS_LINUX_TEST_RESULT([usleep_range], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_USLEEP_RANGE, 1, - [usleep_range is available]) + AC_DEFINE(HAVE_USLEEP_RANGE, 1, [usleep_range is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 index cc50bfbe4e7..82583d52fcb 100644 --- a/config/kernel-vfs-direct_IO.m4 +++ b/config/kernel-vfs-direct_IO.m4 @@ -1,9 +1,8 @@ dnl # -dnl # Linux 4.6.x API change +dnl # Check for direct IO interfaces. dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER], [ - AC_MSG_CHECKING([whether aops->direct_IO() uses iov_iter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ + ZFS_LINUX_TEST_SRC([direct_io_iter], [ #include ssize_t test_direct_IO(struct kiocb *kiocb, @@ -13,24 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER, 1, - [aops->direct_IO() uses iov_iter without rw]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Linux 4.1.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET], [ - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iov_iter with offset]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iter_offset], [ #include ssize_t test_direct_IO(struct kiocb *kiocb, @@ -40,24 +24,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_OFFSET, 1, - [aops->direct_IO() uses iov_iter with offset]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Linux 3.16.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET], [ - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iov_iter with rw and offset]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ #include ssize_t test_direct_IO(int rw, struct kiocb *kiocb, @@ -67,23 +36,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, - [aops->direct_IO() uses iov_iter with rw and offset]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Ancient Linux API (predates git) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC], [ - AC_MSG_CHECKING([whether aops->direct_IO() uses iovec]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iovec], [ #include ssize_t test_direct_IO(int rw, struct kiocb *kiocb, @@ -94,37 +49,61 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, - [aops->direct_IO() uses iovec]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ - zfs_ac_direct_io="no" - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC - fi + dnl # + dnl # Linux 4.6.x API change + dnl # + AC_MSG_CHECKING([whether aops->direct_IO() uses iov_iter]) + ZFS_LINUX_TEST_RESULT([direct_io_iter], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER, 1, + [aops->direct_IO() uses iov_iter without rw]) + ],[ + AC_MSG_RESULT([no]) - if test "$zfs_ac_direct_io" = "no"; then - AC_MSG_ERROR([no; unknown direct IO interface]) - fi + dnl # + dnl # Linux 4.1.x API change + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses offset]) + ZFS_LINUX_TEST_RESULT([direct_io_iter_offset], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_OFFSET, 1, + [aops->direct_IO() uses iov_iter with offset]) + + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 3.16.x API change + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses rw and offset]) + ZFS_LINUX_TEST_RESULT([direct_io_iter_rw_offset], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, + [aops->direct_IO() uses iov_iter with ] + [rw and offset]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Ancient Linux API (predates git) + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses iovec]) + ZFS_LINUX_TEST_RESULT([direct_io_iovec], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, + [aops->direct_IO() uses iovec]) + ],[ + ZFS_LINUX_TEST_ERROR([direct IO]) + AC_MSG_RESULT([no]) + ]) + ]) + ]) + ]) ]) diff --git a/config/kernel-vfs-fsync.m4 b/config/kernel-vfs-fsync.m4 index a474f9f1745..18a60d29aae 100644 --- a/config/kernel-vfs-fsync.m4 +++ b/config/kernel-vfs-fsync.m4 @@ -2,13 +2,17 @@ dnl # dnl # 2.6.35 API change, dnl # Unused 'struct dentry *' removed from vfs_fsync() prototype. dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_VFS_FSYNC], [ - AC_MSG_CHECKING([whether vfs_fsync() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_fsync_2args], [ #include ],[ vfs_fsync(NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_FSYNC_2ARGS], [ + AC_MSG_CHECKING([whether vfs_fsync() wants 2 args]) + ZFS_LINUX_TEST_RESULT([vfs_fsync_2args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_VFS_FSYNC, 1, [vfs_fsync() wants 2 args]) ],[ diff --git a/config/kernel-vfs-getattr.m4 b/config/kernel-vfs-getattr.m4 index b13723538f1..eb07853cc4b 100644 --- a/config/kernel-vfs-getattr.m4 +++ b/config/kernel-vfs-getattr.m4 @@ -2,19 +2,23 @@ dnl # dnl # 4.11 API, a528d35e@torvalds/linux dnl # vfs_getattr(const struct path *p, struct kstat *s, u32 m, unsigned int f) dnl # -AC_DEFUN([ZFS_AC_KERNEL_4ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_4args], [ #include ],[ vfs_getattr((const struct path *)NULL, (struct kstat *)NULL, (u32)0, (unsigned int)0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_4ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_4args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_4ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 4 args]) + [vfs_getattr wants 4 args]) ],[ AC_MSG_RESULT(no) ]) @@ -24,17 +28,21 @@ dnl # dnl # 3.9 API dnl # vfs_getattr(struct path *p, struct kstat *s) dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_2args], [ #include ],[ vfs_getattr((struct path *) NULL, (struct kstat *)NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_2ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_2args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 2 args]) + [vfs_getattr wants 2 args]) ],[ AC_MSG_RESULT(no) ]) @@ -44,19 +52,35 @@ dnl # dnl # <3.9 API dnl # vfs_getattr(struct vfsmount *v, struct dentry *d, struct kstat *k) dnl # -AC_DEFUN([ZFS_AC_KERNEL_3ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_3args], [ #include ],[ vfs_getattr((struct vfsmount *)NULL, (struct dentry *)NULL, (struct kstat *)NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_3ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_3args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 3 args]) + [vfs_getattr wants 3 args]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR], [ + ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS + ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS + ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR], [ + ZFS_AC_KERNEL_VFS_GETATTR_4ARGS + ZFS_AC_KERNEL_VFS_GETATTR_2ARGS + ZFS_AC_KERNEL_VFS_GETATTR_3ARGS +]) diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4 index 5de901d4462..172118eac87 100644 --- a/config/kernel-vfs-iterate.m4 +++ b/config/kernel-vfs-iterate.m4 @@ -1,9 +1,5 @@ -AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ - dnl # - dnl # 4.7 API change - dnl # - AC_MSG_CHECKING([whether fops->iterate_shared() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ + ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ #include int iterate(struct file *filp, struct dir_context * context) { return 0; } @@ -12,11 +8,44 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ __attribute__ ((unused)) = { .iterate_shared = iterate, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([file_operations_iterate], [ + #include + int iterate(struct file *filp, + struct dir_context *context) { return 0; } + + static const struct file_operations fops + __attribute__ ((unused)) = { + .iterate = iterate, + }; + + #if defined(FMODE_KABI_ITERATE) + #error "RHEL 7.5, FMODE_KABI_ITERATE interface" + #endif + ],[]) + + ZFS_LINUX_TEST_SRC([file_operations_readdir], [ + #include + int readdir(struct file *filp, void *entry, + filldir_t func) { return 0; } + + static const struct file_operations fops + __attribute__ ((unused)) = { + .readdir = readdir, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ + dnl # + dnl # 4.7 API change + dnl # + AC_MSG_CHECKING([whether fops->iterate_shared() is available]) + ZFS_LINUX_TEST_RESULT([file_operations_iterate_shared], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_ITERATE_SHARED, 1, - [fops->iterate_shared() is available]) + [fops->iterate_shared() is available]) ],[ AC_MSG_RESULT(no) @@ -31,44 +60,23 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ dnl # to using fops.readdir() to retain KABI compatibility. dnl # AC_MSG_CHECKING([whether fops->iterate() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - int iterate(struct file *filp, - struct dir_context *context) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate = iterate, - }; - - #if defined(FMODE_KABI_ITERATE) - #error "RHEL 7.5, FMODE_KABI_ITERATE interface" - #endif - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([file_operations_iterate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_ITERATE, 1, - [fops->iterate() is available]) + [fops->iterate() is available]) ],[ AC_MSG_RESULT(no) + dnl # + dnl # readdir interface introduced + dnl # AC_MSG_CHECKING([whether fops->readdir() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - int readdir(struct file *filp, void *entry, - filldir_t func) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .readdir = readdir, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([file_operations_readdir], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_READDIR, 1, - [fops->readdir() is available]) + [fops->readdir() is available]) ],[ - AC_MSG_ERROR(no; file a bug report with ZoL) + ZFS_LINUX_TEST_ERROR([vfs_iterate]) ]) ]) ]) diff --git a/config/kernel-vfs-rw-iterate.m4 b/config/kernel-vfs-rw-iterate.m4 index ace54f70711..000353ec15b 100644 --- a/config/kernel-vfs-rw-iterate.m4 +++ b/config/kernel-vfs-rw-iterate.m4 @@ -1,9 +1,8 @@ dnl # dnl # Linux 3.16 API dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], - [AC_MSG_CHECKING([whether fops->read/write_iter() are available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ + ZFS_LINUX_TEST_SRC([file_operations_rw], [ #include ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) @@ -16,39 +15,41 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], .read_iter = test_read, .write_iter = test_write, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, - [fops->read/write_iter() are available]) + ],[]) - ZFS_AC_KERNEL_NEW_SYNC_READ + ZFS_LINUX_TEST_SRC([new_sync_rw], [ + #include ],[ - AC_MSG_RESULT(no) + ssize_t ret __attribute__ ((unused)); + struct file *filp = NULL; + char __user *rbuf = NULL; + const char __user *wbuf = NULL; + size_t len = 0; + loff_t ppos; + + ret = new_sync_read(filp, rbuf, len, &ppos); + ret = new_sync_write(filp, wbuf, len, &ppos); ]) ]) -dnl # -dnl # Linux 4.1 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_NEW_SYNC_READ], - [AC_MSG_CHECKING([whether new_sync_read/write() are available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - ssize_t ret __attribute__ ((unused)); - struct file *filp = NULL; - char __user *rbuf = NULL; - const char __user *wbuf = NULL; - size_t len = 0; - loff_t ppos; - - ret = new_sync_read(filp, rbuf, len, &ppos); - ret = new_sync_write(filp, wbuf, len, &ppos); - ],[ +AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], [ + AC_MSG_CHECKING([whether fops->read/write_iter() are available]) + ZFS_LINUX_TEST_RESULT([file_operations_rw], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NEW_SYNC_READ, 1, - [new_sync_read()/new_sync_write() are available]) + AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, + [fops->read/write_iter() are available]) + + dnl # + dnl # Linux 4.1 API + dnl # + AC_MSG_CHECKING([whether new_sync_read/write() are available]) + ZFS_LINUX_TEST_RESULT([new_sync_rw], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_NEW_SYNC_READ, 1, + [new_sync_read()/new_sync_write() are available]) + ],[ + AC_MSG_RESULT(no) + ]) ],[ AC_MSG_RESULT(no) ]) @@ -57,19 +58,22 @@ AC_DEFUN([ZFS_AC_KERNEL_NEW_SYNC_READ], dnl # dnl # Linux 4.1.x API dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_WRITE_CHECKS], - [AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS], [ + ZFS_LINUX_TEST_SRC([generic_write_checks], [ #include - ],[ struct kiocb *iocb = NULL; struct iov_iter *iov = NULL; generic_write_checks(iocb, iov); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS], [ + AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) + ZFS_LINUX_TEST_RESULT([generic_write_checks], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GENERIC_WRITE_CHECKS_KIOCB, 1, - [generic_write_checks() takes kiocb]) + [generic_write_checks() takes kiocb]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-wait.m4 b/config/kernel-wait.m4 index d6442c1df6f..0414242bf6d 100644 --- a/config/kernel-wait.m4 +++ b/config/kernel-wait.m4 @@ -1,3 +1,26 @@ +dnl # +dnl # 4.13 API change +dnl # Renamed struct wait_queue -> struct wait_queue_entry. +dnl # +dnl # N.B. The type check is performed before all other checks +dnl # since ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY depends on +dnl # HAVE_WAIT_QUEUE_ENTRY_T being set in confdefs.h. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ + AC_MSG_CHECKING([whether wait_queue_entry_t exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + wait_queue_entry_t *entry __attribute__ ((unused)); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, + [wait_queue_entry_t exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 3.17 API change, dnl # wait_on_bit() no longer requires an action argument. The former @@ -8,34 +31,20 @@ dnl # of just two functions: one which uses io_schedule() and one which just dnl # uses schedule(). This API change was made to consolidate all of those dnl # redundant wait functions. dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ - AC_MSG_CHECKING([whether wait_on_bit() takes an action]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_ON_BIT], [ + ZFS_LINUX_TEST_SRC([wait_on_bit], [ #include ],[ int (*action)(void *) = NULL; wait_on_bit(NULL, 0, action, 0); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) - ],[ - AC_MSG_RESULT(no) ]) ]) -dnl # -dnl # 4.13 API change -dnl # Renamed struct wait_queue -> struct wait_queue_entry. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ - AC_MSG_CHECKING([whether wait_queue_entry_t exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - wait_queue_entry_t *entry __attribute__ ((unused)); - ],[ + +AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ + AC_MSG_CHECKING([whether wait_on_bit() takes an action]) + ZFS_LINUX_TEST_RESULT([wait_on_bit], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, - [wait_queue_entry_t exists]) + AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) @@ -46,9 +55,8 @@ dnl # 4.13 API change dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ - AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY], [ + ZFS_LINUX_TEST_SRC([wait_queue_head_entry], [ #include #ifdef HAVE_WAIT_QUEUE_ENTRY_T @@ -66,7 +74,12 @@ AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ head = &wq_head.head; entry = &wq_entry.entry; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ + AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) + ZFS_LINUX_TEST_RESULT([wait_queue_head_entry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_WAIT_QUEUE_HEAD_ENTRY, 1, [wq_head->head and wq_entry->entry exist]) @@ -74,3 +87,13 @@ AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT], [ + ZFS_AC_KERNEL_SRC_WAIT_ON_BIT + ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY +]) + +AC_DEFUN([ZFS_AC_KERNEL_WAIT], [ + ZFS_AC_KERNEL_WAIT_ON_BIT + ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY +]) diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 index 0b61b85b1d4..ed84c63902f 100644 --- a/config/kernel-xattr-handler.m4 +++ b/config/kernel-xattr-handler.m4 @@ -3,9 +3,8 @@ dnl # 2.6.35 API change, dnl # The 'struct xattr_handler' was constified in the generic dnl # super_block structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ - AC_MSG_CHECKING([whether super_block uses const struct xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER], [ + ZFS_LINUX_TEST_SRC([const_xattr_handler], [ #include #include @@ -22,11 +21,15 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ const struct super_block sb __attribute__ ((unused)) = { .s_xattr = xattr_handlers, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ + AC_MSG_CHECKING([whether super_block uses const struct xattr_handler]) + ZFS_LINUX_TEST_RESULT([const_xattr_handler], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_CONST_XATTR_HANDLER, 1, - [super_block uses const struct xattr_handler]) + [super_block uses const struct xattr_handler]) ],[ AC_MSG_RESULT([no]) ]) @@ -38,17 +41,20 @@ dnl # struct xattr_handler added new member "name". dnl # xattr_handler which matches to whole name rather than prefix should use dnl # "name" instead of "prefix", e.g. "system.posix_acl_access" dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ - AC_MSG_CHECKING([whether xattr_handler has name]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME], [ + ZFS_LINUX_TEST_SRC([xattr_handler_name], [ #include static const struct xattr_handler xops __attribute__ ((unused)) = { .name = XATTR_NAME_POSIX_ACL_ACCESS, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ + AC_MSG_CHECKING([whether xattr_handler has name]) + ZFS_LINUX_TEST_RESULT([xattr_handler_name], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_HANDLER_NAME, 1, [xattr_handler has name]) @@ -58,52 +64,65 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ ]) dnl # -dnl # 4.9 API change, -dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are -dnl # removed. xattr operations will directly go through sb->s_xattr. +dnl # Supported xattr handler get() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR], [ - AC_MSG_CHECKING([whether generic_setxattr() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ + ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ #include - static const struct inode_operations - iops __attribute__ ((unused)) = { - .setxattr = generic_setxattr + int get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, - [generic_setxattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) + ],[]) -dnl # -dnl # Supported xattr handler get() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->get() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->get() wants both dentry and inode]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ #include int get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) { return 0; } + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return 0; } static const struct xattr_handler xops __attribute__ ((unused)) = { .get = get, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ + #include + + int get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int handler_flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_get_inode], [ + #include + + int get(struct inode *ip, const char *name, + void *buffer, size_t size) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ + dnl # + dnl # 4.7 API change, + dnl # The xattr_handler->get() callback was changed to take both + dnl # dentry and inode. + dnl # + AC_MSG_CHECKING([whether xattr_handler->get() wants dentry and inode]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE, 1, [xattr_handler->get() wants both dentry and inode]) @@ -115,69 +134,40 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ dnl # should be accessed by handler->flags. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->get() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->get() wants xattr_handler]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_HANDLER, 1, [xattr_handler->get() wants xattr_handler]) ],[ dnl # dnl # 2.6.33 API change, - dnl # The xattr_handler->get() callback was changed to take - dnl # a dentry instead of an inode, and a handler_flags - dnl # argument was added. + dnl # The xattr_handler->get() callback was changed + dnl # to take a dentry instead of an inode, and a + dnl # handler_flags argument was added. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->get() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int handler_flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->get() wants dentry]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1, [xattr_handler->get() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->get() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(struct inode *ip, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_get_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_INODE, 1, [xattr_handler->get() wants inode]) ],[ - AC_MSG_ERROR([no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr get()]) ]) ]) ]) @@ -187,14 +177,8 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ dnl # dnl # Supported xattr handler set() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->set() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->set() wants both dentry and inode]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ + ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [ #include int set(const struct xattr_handler *handler, @@ -206,8 +190,54 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ xops __attribute__ ((unused)) = { .set = set, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ + #include + + int set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ + #include + + int set(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, + int handler_flags) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_inode], [ + #include + + int set(struct inode *ip, const char *name, + const void *buffer, size_t size, int flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ + dnl # + dnl # 4.7 API change, + dnl # The xattr_handler->set() callback was changed to take both + dnl # dentry and inode. + dnl # + AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, [xattr_handler->set() wants both dentry and inode]) @@ -219,71 +249,40 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ dnl # should be accessed by handler->flags. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->set() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->set() wants xattr_handler]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, [xattr_handler->set() wants xattr_handler]) ],[ dnl # dnl # 2.6.33 API change, - dnl # The xattr_handler->set() callback was changed to take a - dnl # dentry instead of an inode, and a handler_flags - dnl # argument was added. + dnl # The xattr_handler->set() callback was changed + dnl # to take a dentry instead of an inode, and a + dnl # handler_flags argument was added. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->set() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->set() wants dentry]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, [xattr_handler->set() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->set() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(struct inode *ip, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_set_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_INODE, 1, [xattr_handler->set() wants inode]) ],[ - AC_MSG_ERROR([no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr set()]) ]) ]) ]) @@ -293,12 +292,8 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ dnl # dnl # Supported xattr handler list() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ - dnl # 4.5 API change, - dnl # The xattr_handler->list() callback was changed to take only a - dnl # dentry and it only needs to return if it's accessible. - AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ + ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ #include bool list(struct dentry *dentry) { return 0; } @@ -306,8 +301,52 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ xops __attribute__ ((unused)) = { .list = list, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ + #include + + size_t list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ + #include + + size_t list(struct dentry *dentry, + char *list, size_t list_size, + const char *name, size_t name_len, + int handler_flags) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_inode], [ + #include + + size_t list(struct inode *ip, char *lst, + size_t list_size, const char *name, + size_t name_len) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ + dnl # 4.5 API change, + dnl # The xattr_handler->list() callback was changed to take only a + dnl # dentry and it only needs to return if it's accessible. + AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) + ZFS_LINUX_TEST_RESULT([xattr_handler_list_simple], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_SIMPLE, 1, [xattr_handler->list() wants simple]) @@ -321,18 +360,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(const struct xattr_handler *handler, - struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([xattr_handler_list_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_HANDLER, 1, [xattr_handler->list() wants xattr_handler]) @@ -346,47 +374,24 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(struct dentry *dentry, - char *list, size_t list_size, - const char *name, size_t name_len, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([xattr_handler_list_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_DENTRY, 1, [xattr_handler->list() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(struct inode *ip, char *lst, - size_t list_size, const char *name, - size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_list_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_INODE, 1, [xattr_handler->list() wants inode]) ],[ - AC_MSG_ERROR( - [no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr list()]) ]) ]) ]) @@ -398,15 +403,19 @@ dnl # 3.7 API change, dnl # The posix_acl_{from,to}_xattr functions gained a new dnl # parameter: user_ns dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ - AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS], [ + ZFS_LINUX_TEST_SRC([posix_acl_from_xattr_userns], [ #include #include #include ],[ posix_acl_from_xattr(&init_user_ns, NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ + AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) + ZFS_LINUX_TEST_RESULT([posix_acl_from_xattr_userns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_FROM_XATTR_USERNS, 1, [posix_acl_from_xattr() needs user_ns]) @@ -415,3 +424,50 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ ]) ]) +dnl # +dnl # 4.9 API change, +dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are +dnl # removed. xattr operations will directly go through sb->s_xattr. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR], [ + ZFS_LINUX_TEST_SRC([have_generic_setxattr], [ + #include + #include + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .setxattr = generic_setxattr + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_SETXATTR], [ + AC_MSG_CHECKING([whether generic_setxattr() exists]) + ZFS_LINUX_TEST_RESULT([have_generic_setxattr], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, + [generic_setxattr() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR], [ + ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST + ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS + ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR], [ + ZFS_AC_KERNEL_CONST_XATTR_HANDLER + ZFS_AC_KERNEL_XATTR_HANDLER_NAME + ZFS_AC_KERNEL_XATTR_HANDLER_GET + ZFS_AC_KERNEL_XATTR_HANDLER_SET + ZFS_AC_KERNEL_XATTR_HANDLER_LIST + ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS + ZFS_AC_KERNEL_GENERIC_SETXATTR +]) diff --git a/config/kernel-zlib.m4 b/config/kernel-zlib.m4 index 3ca7cf682da..d554d1168e7 100644 --- a/config/kernel-zlib.m4 +++ b/config/kernel-zlib.m4 @@ -1,62 +1,25 @@ -dnl # -dnl # zlib inflate compat, -dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE], [ - AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #if !defined(CONFIG_ZLIB_INFLATE) && \ - !defined(CONFIG_ZLIB_INFLATE_MODULE) - #error CONFIG_ZLIB_INFLATE not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([ - *** This kernel does not include the required zlib inflate support. - *** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.]) - ]) -]) - -dnl # -dnl # zlib deflate compat, -dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE], [ - AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #if !defined(CONFIG_ZLIB_DEFLATE) && \ - !defined(CONFIG_ZLIB_DEFLATE_MODULE) - #error CONFIG_ZLIB_DEFLATE not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([ - *** This kernel does not include the required zlib deflate support. - *** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.]) - ]) -]) - dnl # dnl # 2.6.39 API compat, +dnl dnl # The function zlib_deflate_workspacesize() now take 2 arguments. dnl # This was done to avoid always having to allocate the maximum size dnl # workspace (268K). The caller can now specific the windowBits and dnl # memLevel compression parameters to get a smaller workspace. dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], - [AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ + ZFS_LINUX_TEST_SRC([2args_zlib_deflate_workspacesize], [ #include ],[ return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ + AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) + ZFS_LINUX_TEST_RESULT([2args_zlib_deflate_workspacesize], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1, - [zlib_deflate_workspacesize() wants 2 args]) + [zlib_deflate_workspacesize() wants 2 args]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 8e89c8014d8..b22a00cdd13 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -2,111 +2,217 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ + dnl # Setup the kernel build environment. ZFS_AC_KERNEL ZFS_AC_QAT - ZFS_AC_KERNEL_ACCESS_OK_TYPE - ZFS_AC_TEST_MODULE + + dnl # Sanity checks for module building and CONFIG_* defines + ZFS_AC_KERNEL_TEST_MODULE + ZFS_AC_KERNEL_CONFIG_DEFINED + + dnl # Sequential ZFS_LINUX_TRY_COMPILE tests + ZFS_AC_KERNEL_FPU_HEADER + ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T ZFS_AC_KERNEL_MISC_MINOR + ZFS_AC_KERNEL_DECLARE_EVENT_CLASS + + dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests + ZFS_AC_KERNEL_TEST_SRC + ZFS_AC_KERNEL_TEST_RESULT + + AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ + KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" + ]) + + AC_SUBST(KERNEL_MAKE) +]) + +dnl # +dnl # Generate and compile all of the kernel API test cases to determine +dnl # which interfaces are available. By invoking the kernel build system +dnl # only once the compilation can be done in parallel significantly +dnl # speeding up the process. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_OBJTOOL + ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE + ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE + ZFS_AC_KERNEL_SRC_CTL_NAME + ZFS_AC_KERNEL_SRC_PDE_DATA + ZFS_AC_KERNEL_SRC_FALLOCATE + ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE + ZFS_AC_KERNEL_SRC_RWSEM + ZFS_AC_KERNEL_SRC_SCHED + ZFS_AC_KERNEL_SRC_USLEEP_RANGE + ZFS_AC_KERNEL_SRC_KMEM_CACHE + ZFS_AC_KERNEL_SRC_WAIT + ZFS_AC_KERNEL_SRC_INODE_TIMES + ZFS_AC_KERNEL_SRC_INODE_LOCK + ZFS_AC_KERNEL_SRC_GROUP_INFO_GID + ZFS_AC_KERNEL_SRC_RW + ZFS_AC_KERNEL_SRC_TIMER_SETUP + ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL + ZFS_AC_KERNEL_SRC_SUPER_USER_NS + ZFS_AC_KERNEL_SRC_SUBMIT_BIO + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS + ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH + ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART + ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV + ZFS_AC_KERNEL_SRC_LOOKUP_BDEV + ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE + ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE + ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE + ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER + ZFS_AC_KERNEL_SRC_BIO_FAILFAST + ZFS_AC_KERNEL_SRC_BIO_SET_DEV + ZFS_AC_KERNEL_SRC_BIO_OPS + ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS + ZFS_AC_KERNEL_SRC_BIO_BI_STATUS + ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER + ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD + ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI + ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD + ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH + ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG + ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE + ZFS_AC_KERNEL_SRC_GET_DISK_RO + ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL + ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY + ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE + ZFS_AC_KERNEL_SRC_XATTR + ZFS_AC_KERNEL_SRC_ACL + ZFS_AC_KERNEL_SRC_INODE_GETATTR + ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS + ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION + ZFS_AC_KERNEL_SRC_SHOW_OPTIONS + ZFS_AC_KERNEL_SRC_FILE_INODE + ZFS_AC_KERNEL_SRC_FILE_DENTRY + ZFS_AC_KERNEL_SRC_FSYNC + ZFS_AC_KERNEL_SRC_AIO_FSYNC + ZFS_AC_KERNEL_SRC_EVICT_INODE + ZFS_AC_KERNEL_SRC_DIRTY_INODE + ZFS_AC_KERNEL_SRC_SHRINKER + ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T + ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA + ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA + ZFS_AC_KERNEL_SRC_GET_LINK + ZFS_AC_KERNEL_SRC_PUT_LINK + ZFS_AC_KERNEL_SRC_TMPFILE + ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE + ZFS_AC_KERNEL_SRC_AUTOMOUNT + ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE + ZFS_AC_KERNEL_SRC_COMMIT_METADATA + ZFS_AC_KERNEL_SRC_CLEAR_INODE + ZFS_AC_KERNEL_SRC_SETATTR_PREPARE + ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE + ZFS_AC_KERNEL_SRC_SECURITY_INODE + ZFS_AC_KERNEL_SRC_FST_MOUNT + ZFS_AC_KERNEL_SRC_BDI + ZFS_AC_KERNEL_SRC_SET_NLINK + ZFS_AC_KERNEL_SRC_ELEVATOR_CHANGE + ZFS_AC_KERNEL_SRC_SGET + ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE + ZFS_AC_KERNEL_SRC_VFS_GETATTR + ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS + ZFS_AC_KERNEL_SRC_VFS_ITERATE + ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO + ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE + ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS + ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS + ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE + ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN + ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT + ZFS_AC_KERNEL_SRC_FPU + ZFS_AC_KERNEL_SRC_FMODE_T + ZFS_AC_KERNEL_SRC_KUIDGID_T + ZFS_AC_KERNEL_SRC_KUID_HELPERS + ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST + ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS + ZFS_AC_KERNEL_SRC_CURRENT_TIME + ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES + ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL + ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC + ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES + ZFS_AC_KERNEL_SRC_KSTRTOUL + + AC_MSG_CHECKING([for available kernel interfaces]) + ZFS_LINUX_TEST_COMPILE_ALL([kabi]) + AC_MSG_RESULT([done]) +]) + +dnl # +dnl # Check results of kernel interface tests. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_ACCESS_OK_TYPE + ZFS_AC_KERNEL_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_OBJTOOL - ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_CTL_NAME ZFS_AC_KERNEL_PDE_DATA - ZFS_AC_KERNEL_2ARGS_VFS_FSYNC - ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW - ZFS_AC_KERNEL_RWSEM_ACTIVITY - ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT - ZFS_AC_KERNEL_SCHED_RT_HEADER - ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT - ZFS_AC_KERNEL_4ARGS_VFS_GETATTR - ZFS_AC_KERNEL_3ARGS_VFS_GETATTR - ZFS_AC_KERNEL_2ARGS_VFS_GETATTR + ZFS_AC_KERNEL_RWSEM + ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE - ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS - ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY - ZFS_AC_KERNEL_WAIT_ON_BIT - ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T - ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY + ZFS_AC_KERNEL_KMEM_CACHE + ZFS_AC_KERNEL_WAIT ZFS_AC_KERNEL_INODE_TIMES ZFS_AC_KERNEL_INODE_LOCK ZFS_AC_KERNEL_GROUP_INFO_GID - ZFS_AC_KERNEL_WRITE - ZFS_AC_KERNEL_READ + ZFS_AC_KERNEL_RW ZFS_AC_KERNEL_TIMER_SETUP - ZFS_AC_KERNEL_DECLARE_EVENT_CLASS ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_SUBMIT_BIO - ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS - ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID - ZFS_AC_KERNEL_TYPE_FMODE_T + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_BLKDEV_REREAD_PART - ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE + ZFS_AC_KERNEL_INVALIDATE_BDEV ZFS_AC_KERNEL_LOOKUP_BDEV - ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS + ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE ZFS_AC_KERNEL_BIO_BVEC_ITER - ZFS_AC_KERNEL_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_BIO_FAILFAST ZFS_AC_KERNEL_BIO_SET_DEV - ZFS_AC_KERNEL_REQ_FAILFAST_MASK - ZFS_AC_KERNEL_REQ_OP_DISCARD - ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE - ZFS_AC_KERNEL_REQ_OP_FLUSH - ZFS_AC_KERNEL_BIO_BI_OPF + ZFS_AC_KERNEL_BIO_OPS ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_BI_STATUS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_BDI - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_BLK_QUEUE_DISCARD + ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE + ZFS_AC_KERNEL_BLK_QUEUE_FLAGS ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG - ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG + ZFS_AC_KERNEL_BLK_QUEUE_PLUG ZFS_AC_KERNEL_GET_DISK_AND_MODULE ZFS_AC_KERNEL_GET_DISK_RO - ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_DISCARD_GRANULARITY - ZFS_AC_KERNEL_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_XATTR_HANDLER_GET - ZFS_AC_KERNEL_XATTR_HANDLER_SET - ZFS_AC_KERNEL_XATTR_HANDLER_LIST ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE - ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_POSIX_ACL_CHMOD - ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS - ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION - ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA - ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS - ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_GETATTR + ZFS_AC_KERNEL_XATTR + ZFS_AC_KERNEL_ACL + ZFS_AC_KERNEL_INODE_GETATTR ZFS_AC_KERNEL_INODE_SET_FLAGS ZFS_AC_KERNEL_INODE_SET_IVERSION - ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_FILE_INODE ZFS_AC_KERNEL_FILE_DENTRY ZFS_AC_KERNEL_FSYNC - ZFS_AC_KERNEL_EVICT_INODE - ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS - ZFS_AC_KERNEL_NR_CACHED_OBJECTS - ZFS_AC_KERNEL_FREE_CACHED_OBJECTS - ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_AIO_FSYNC + ZFS_AC_KERNEL_EVICT_INODE + ZFS_AC_KERNEL_DIRTY_INODE + ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR_UMODE_T ZFS_AC_KERNEL_LOOKUP_NAMEIDATA ZFS_AC_KERNEL_CREATE_NAMEIDATA @@ -120,58 +226,38 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CLEAR_INODE ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_D_MAKE_ROOT - ZFS_AC_KERNEL_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_D_PRUNE_ALIASES - ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_DENTRY ZFS_AC_KERNEL_TRUNCATE_SETSIZE - ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY - ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY + ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT - ZFS_AC_KERNEL_SHRINK - ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT - ZFS_AC_KERNEL_SHRINKER_CALLBACK - ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD - ZFS_AC_KERNEL_S_D_OP ZFS_AC_KERNEL_BDI ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_ELEVATOR_CHANGE - ZFS_AC_KERNEL_5ARG_SGET + ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_LSEEK_EXECUTE + ZFS_AC_KERNEL_VFS_GETATTR + ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_ITERATE - ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_VFS_DIRECT_IO - ZFS_AC_KERNEL_GENERIC_WRITE_CHECKS + ZFS_AC_KERNEL_VFS_RW_ITERATE + ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN - ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG - ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG + ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU + ZFS_AC_KERNEL_FMODE_T + ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST ZFS_AC_KERNEL_RENAME_WANTS_FLAGS - ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR ZFS_AC_KERNEL_CURRENT_TIME - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE - ZFS_AC_KERNEL_ACL_HAS_REFCOUNT ZFS_AC_KERNEL_USERNS_CAPABILITIES ZFS_AC_KERNEL_IN_COMPAT_SYSCALL ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES - ZFS_AC_KERNEL_BLK_QUEUE_DISCARD - ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE ZFS_AC_KERNEL_KSTRTOUL - - AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ - KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" - ]) - - AC_SUBST(KERNEL_MAKE) ]) dnl # @@ -190,9 +276,10 @@ AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution - *** is installed. If you are building with a custom kernel, make sure the - *** kernel is configured, built, and the '--with-linux=PATH' configure - *** option refers to the location of the kernel source.]) + *** is installed. If you are building with a custom kernel, make sure + *** the kernel is configured, built, and the '--with-linux=PATH' + *** configure option refers to the location of the kernel source. + ]) ]) ], [ LINUX_SYMBOLS=NONE @@ -285,12 +372,16 @@ AC_DEFUN([ZFS_AC_KERNEL], [ AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) - AC_MSG_ERROR([*** Cannot determine kernel version.]) + AC_MSG_ERROR([ + *** Cannot determine kernel version. + ]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then - AC_MSG_ERROR([*** Cannot find UTS_RELEASE definition.]) + AC_MSG_ERROR([ + *** Cannot find UTS_RELEASE definition. + ]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. @@ -312,24 +403,27 @@ AC_DEFUN([ZFS_AC_KERNEL], [ ]) dnl # -dnl # Detect the QAT module to be built against -dnl # QAT provides hardware acceleration for data compression: -dnl # https://01.org/intel-quickassist-technology -dnl # * Download and install QAT driver from the above link -dnl # * Start QAT driver in your system: -dnl # service qat_service start -dnl # * Enable QAT in ZFS, e.g.: -dnl # ./configure --with-qat=/QAT1.6 -dnl # make -dnl # * Set GZIP compression in ZFS dataset: -dnl # zfs set compression = gzip -dnl # Then the data written to this ZFS pool is compressed -dnl # by QAT accelerator automatically, and de-compressed by -dnl # QAT when read from the pool. -dnl # * Get QAT hardware statistics by: -dnl # cat /proc/icp_dh895xcc_dev/qat -dnl # * To disable QAT: -dnl # insmod zfs.ko zfs_qat_disable=1 +dnl # Detect the QAT module to be built against, QAT provides hardware +dnl # acceleration for data compression: +dnl # +dnl # https://01.org/intel-quickassist-technology +dnl # +dnl # 1) Download and install QAT driver from the above link +dnl # 2) Start QAT driver in your system: +dnl # service qat_service start +dnl # 3) Enable QAT in ZFS, e.g.: +dnl # ./configure --with-qat=/QAT1.6 +dnl # make +dnl # 4) Set GZIP compression in ZFS dataset: +dnl # zfs set compression = gzip +dnl # +dnl # Then the data written to this ZFS pool is compressed by QAT accelerator +dnl # automatically, and de-compressed by QAT when read from the pool. +dnl # +dnl # 1) Get QAT hardware statistics with: +dnl # cat /proc/icp_dh895xcc_dev/qat +dnl # 2) To disable QAT: +dnl # insmod zfs.ko zfs_qat_disable=1 dnl # AC_DEFUN([ZFS_AC_QAT], [ AC_ARG_WITH([qat], @@ -350,11 +444,11 @@ AC_DEFUN([ZFS_AC_QAT], [ QAT_SRC="${qatsrc}/quickassist" AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [ AC_MSG_ERROR([ - *** Please make sure the qat driver package is installed - *** and specify the location of the qat source with the - *** '--with-qat=PATH' option then try again. Failed to - *** find cpa.h in: - ${QAT_SRC}/include]) + *** Please make sure the qat driver package is installed + *** and specify the location of the qat source with the + *** '--with-qat=PATH' option then try again. Failed to + *** find cpa.h in: + ${QAT_SRC}/include]) ]) ]) @@ -368,9 +462,9 @@ AC_DEFUN([ZFS_AC_QAT], [ QAT_OBJ=${qatbuild} AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [ AC_MSG_ERROR([ - *** Please make sure the qat driver is installed then try again. - *** Failed to find icp_qa_al.ko or qat_api.ko in: - $QAT_OBJ]) + *** Please make sure the qat driver is installed then try again. + *** Failed to find icp_qa_al.ko or qat_api.ko in: + $QAT_OBJ]) ]) AC_SUBST(QAT_SRC) @@ -391,10 +485,10 @@ AC_DEFUN([ZFS_AC_QAT], [ AC_MSG_RESULT([$QAT_SYMBOLS]) AC_SUBST(QAT_SYMBOLS) ],[ - AC_MSG_ERROR([ - *** Please make sure the qat driver is installed then try again. - *** Failed to find Module.symvers in: - $QAT_SYMBOLS]) + AC_MSG_ERROR([ + *** Please make sure the qat driver is installed then try again. + *** Failed to find Module.symvers in: + $QAT_SYMBOLS ]) ]) ]) @@ -403,14 +497,16 @@ AC_DEFUN([ZFS_AC_QAT], [ dnl # dnl # Basic toolchain sanity check. dnl # -AC_DEFUN([ZFS_AC_TEST_MODULE], [ +AC_DEFUN([ZFS_AC_KERNEL_TEST_MODULE], [ AC_MSG_CHECKING([whether modules can be built]) - ZFS_LINUX_TRY_COMPILE([],[],[ + ZFS_LINUX_TRY_COMPILE([], [], [ AC_MSG_RESULT([yes]) ],[ AC_MSG_RESULT([no]) if test "x$enable_linux_builtin" != xyes; then - AC_MSG_ERROR([*** Unable to build an empty module.]) + AC_MSG_ERROR([ + *** Unable to build an empty module. + ]) else AC_MSG_ERROR([ *** Unable to build an empty module. @@ -420,207 +516,313 @@ AC_DEFUN([ZFS_AC_TEST_MODULE], [ ]) dnl # -dnl # Certain kernel build options are not supported. These must be -dnl # detected at configure time and cause a build failure. Otherwise -dnl # modules may be successfully built that behave incorrectly. +dnl # ZFS_LINUX_CONFTEST_H dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [ - AS_IF([test "x$cross_compiling" != xyes], [ - AC_RUN_IFELSE([ - AC_LANG_PROGRAM([ - #include "$LINUX/include/linux/license.h" - ], [ - return !license_is_gpl_compatible("$ZFS_META_LICENSE"); - ]) - ], [ - AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], - [Define to 1 if GPL-only symbols can be used]) - ], [ - ]) - ]) +AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ +test -d build/$2 || mkdir -p build/$2 +cat - <<_ACEOF >build/$2/$2.h +$1 +_ACEOF +]) - ZFS_AC_KERNEL_CONFIG_THREAD_SIZE - ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC - ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS - ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE - ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE +dnl # +dnl # ZFS_LINUX_CONFTEST_C +dnl # +AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ +test -d build/$2 || mkdir -p build/$2 +cat confdefs.h - <<_ACEOF >build/$2/$2.c +$1 +_ACEOF ]) dnl # -dnl # Check configured THREAD_SIZE +dnl # ZFS_LINUX_CONFTEST_MAKEFILE dnl # -dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 -dnl # the default thread stack size was increased to 16K from 8K. Therefore, -dnl # on newer kernels and some architectures stack usage optimizations can be -dnl # conditionally applied to improve performance without negatively impacting -dnl # stability. +dnl # $1 - test case name +dnl # $2 - add to top-level Makefile +dnl # $3 - additional build flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ - AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - #if (THREAD_SIZE < 16384) - #error "THREAD_SIZE is less than 16K" - #endif - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) - ],[ - AC_MSG_RESULT([no]) - ]) +AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [ + test -d build || mkdir -p build + test -d build/$1 || mkdir -p build/$1 + + file=build/$1/Makefile + + dnl # Example command line to manually build source. + cat - <<_ACEOF >$file +# Example command line to manually build source +# make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1 + +ccflags-y := -Werror $FRAME_LARGER_THAN +_ACEOF + + dnl # Additional custom CFLAGS as requested. + m4_ifval($3, [echo "ccflags-y += $3" >>$file], []) + + dnl # Test case source + echo "obj-m := $1.o" >>$file + + AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], []) ]) dnl # -dnl # Check CONFIG_DEBUG_LOCK_ALLOC +dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY]) dnl # -dnl # This is typically only set for debug kernels because it comes with -dnl # a performance penalty. However, when it is set it maps the non-GPL -dnl # symbol mutex_lock() to the GPL-only mutex_lock_nested() symbol. -dnl # This will cause a failure at link time which we'd rather know about -dnl # at compile time. +m4_define([ZFS_LINUX_TEST_PROGRAM], [ +$1 +int +main (void) +{ +$2 + ; + return 0; +} +]) + dnl # -dnl # Since we plan to pursue making mutex_lock_nested() a non-GPL symbol -dnl # with the upstream community we add a check to detect this case. +dnl # ZFS_LINUX_TEST_REMOVE dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [ - - ZFS_LINUX_CONFIG([DEBUG_LOCK_ALLOC], [ - AC_MSG_CHECKING([whether mutex_lock() is GPL-only]) - tmp_flags="$EXTRA_KCFLAGS" - ZFS_LINUX_TRY_COMPILE([ - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct mutex lock; +dnl # Removes the specified test source and results. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [ + test -d build/$1 && rm -Rf build/$1 + test -f build/Makefile && sed '/$1/d' build/Makefile +]) - mutex_init(&lock); - mutex_lock(&lock); - mutex_unlock(&lock); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_MSG_ERROR([ - *** Kernel built with CONFIG_DEBUG_LOCK_ALLOC which is incompatible - *** with the CDDL license and will prevent the module linking stage - *** from succeeding. You must rebuild your kernel without this - *** option enabled.]) - ]) - EXTRA_KCFLAGS="$tmp_flags" - ], []) +dnl # +dnl # ZFS_LINUX_COMPILE +dnl # +dnl # $1 - build dir +dnl # $2 - test command +dnl # $3 - pass command +dnl # $4 - fail command +dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes' +dnl # $6 - set KBUILD_MODPOST_WARN='yes' +dnl # +dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST} +dnl # +AC_DEFUN([ZFS_LINUX_COMPILE], [ + AC_TRY_COMMAND([ + KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6" + make modules -k -j$TEST_JOBS -C $LINUX_OBJ $ARCH_UM + M=$PWD/$1 &>$1/build.log]) + AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4]) ]) dnl # -dnl # Check CONFIG_TRIM_UNUSED_KSYMS +dnl # ZFS_LINUX_TEST_COMPILE dnl # -dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS disabled. +dnl # Perform a full compile excluding the final modpost phase. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS], [ - AC_MSG_CHECKING([whether CONFIG_TRIM_UNUSED_KSYM is disabled]) - ZFS_LINUX_TRY_COMPILE([ - #if defined(CONFIG_TRIM_UNUSED_KSYMS) - #error CONFIG_TRIM_UNUSED_KSYMS not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) +AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [ + ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ + mv $2/Makefile $2/Makefile.compile.$1 + mv $2/build.log $2/build.log.$1 ],[ - AC_MSG_RESULT([no]) - AS_IF([test "x$enable_linux_builtin" != xyes], [ - AC_MSG_ERROR([ - *** This kernel has unused symbols trimming enabled, please disable. - *** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.]) - ])]) + AC_MSG_ERROR([ + *** Unable to compile test source to determine kernel interfaces.]) + ], [yes], []) ]) dnl # -dnl # ZFS_LINUX_CONFTEST_H +dnl # ZFS_LINUX_TEST_MODPOST dnl # -AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ -cat - <<_ACEOF >conftest.h -$1 -_ACEOF +dnl # Perform a full compile including the modpost phase. This may +dnl # be an incremental build if the objects have already been built. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [ + ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ + mv $2/Makefile $2/Makefile.modpost.$1 + cat $2/build.log >>build/build.log.$1 + ],[ + AC_MSG_ERROR([ + *** Unable to modpost test source to determine kernel interfaces.]) + ], [], [yes]) ]) dnl # -dnl # ZFS_LINUX_CONFTEST_C +dnl # Perform the compilation of the test cases in two phases. dnl # -AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ -cat confdefs.h - <<_ACEOF >conftest.c -$1 -_ACEOF +dnl # Phase 1) attempt to build the object files for all of the tests +dnl # defined by the ZFS_LINUX_TEST_SRC macro. But do not +dnl # perform the final modpost stage. +dnl # +dnl # Phase 2) disable all tests which failed the initial compilation, +dnl # then invoke the final modpost step for the remaining tests. +dnl # +dnl # This allows us efficiently build the test cases in parallel while +dnl # remaining resilient to build failures which are expected when +dnl # detecting the available kernel interfaces. +dnl # +dnl # The maximum allowed parallelism can be controlled by setting the +dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc). +dnl # +AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [ + dnl # Phase 1 - Compilation only, final linking is skipped. + ZFS_LINUX_TEST_COMPILE([$1], [build]) + + dnl # + dnl # Phase 2 - When building external modules disable test cases + dnl # which failed to compile and invoke modpost to verify the + dnl # final linking. + dnl # + dnl # Test names suffixed with '_license' call modpost independently + dnl # to ensure that a single incompatibility does not result in the + dnl # modpost phase exiting early. This check is not performed on + dnl # every symbol since the majority are compatible and doing so + dnl # would significantly slow down this phase. + dnl # + dnl # When configuring for builtin (--enable-linux-builtin) + dnl # fake the linking step artificially create the expected .ko + dnl # files for tests which did compile. This is required for + dnl # kernels which do not have loadable module support or have + dnl # not yet been built. + dnl # + AS_IF([test "x$enable_linux_builtin" = "xno"], [ + for dir in $(awk '/^obj-m/ { print [$]3 }' \ + build/Makefile.compile.$1); do + name=${dir%/} + AS_IF([test -f build/$name/$name.o], [ + AS_IF([test "${name##*_}" = "license"], [ + ZFS_LINUX_TEST_MODPOST([$1], + [build/$name]) + echo "obj-n += $dir" >>build/Makefile + ], [ + echo "obj-m += $dir" >>build/Makefile + ]) + ], [ + echo "obj-n += $dir" >>build/Makefile + ]) + done + + ZFS_LINUX_TEST_MODPOST([$1], [build]) + ], [ + for dir in $(awk '/^obj-m/ { print [$]3 }' \ + build/Makefile.compile.$1); do + name=${dir%/} + AS_IF([test -f build/$name/$name.o], [ + touch build/$name/$name.ko + ]) + done + ]) ]) dnl # -dnl # ZFS_LANG_PROGRAM(C)([PROLOGUE], [BODY]) +dnl # ZFS_LINUX_TEST_SRC dnl # -m4_define([ZFS_LANG_PROGRAM], [ -$1 -int -main (void) -{ -dnl Do *not* indent the following line: there may be CPP directives. -dnl Don't move the `;' right after for the same reason. -$2 - ; - return 0; -} +dnl # $1 - name +dnl # $2 - global +dnl # $3 - source +dnl # $4 - extra cflags +dnl # $5 - check license-compatibility +dnl # +dnl # N.B because all of the test cases are compiled in parallel they +dnl # must never depend on the results of previous tests. Each test +dnl # needs to be entirely independent. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_SRC], [ + ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]])], [$1]) + ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4]) + + AS_IF([ test -n "$5" ], [ + ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[ + #include + MODULE_LICENSE("$5"); + $2]], [[$3]])], [$1_license]) + ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4]) + ]) ]) dnl # -dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE +dnl # ZFS_LINUX_TEST_RESULT dnl # -AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ - m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])]) - m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])]) - rm -Rf build && mkdir -p build && touch build/conftest.mod.c - echo "obj-m := conftest.o" >build/Makefile - modpost_flag='' - test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage - AS_IF( - [AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $FRAME_LARGER_THAN $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])], - [$4], - [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])] - ) - rm -Rf build +dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC) +dnl # $2 - run on success (valid .ko generated) +dnl # $3 - run on failure (unable to compile) +dnl # +AC_DEFUN([ZFS_LINUX_TEST_RESULT], [ + AS_IF([test -d build/$1], [ + AS_IF([test -f build/$1/$1.ko], [$2], [$3]) + ], [ + AC_MSG_ERROR([ + *** No matching source for the "$1" test, check that + *** both the test source and result macros refer to the same name. + ]) + ]) ]) dnl # -dnl # ZFS_LINUX_TRY_COMPILE like AC_TRY_COMPILE +dnl # ZFS_LINUX_TEST_ERROR dnl # -AC_DEFUN([ZFS_LINUX_TRY_COMPILE], - [ZFS_LINUX_COMPILE_IFELSE( - [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], - [modules], - [test -s build/conftest.o], - [$3], [$4]) +dnl # Generic error message which can be used when none of the expected +dnl # kernel interfaces were detected. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_ERROR], [ + AC_MSG_ERROR([ + *** None of the expected "$1" interfaces were detected. + *** This may be because your kernel version is newer than what is + *** supported, or you are using a patched custom kernel with + *** incompatible modifications. + *** + *** ZFS Version: $ZFS_META_ALIAS + *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX + ]) ]) dnl # -dnl # ZFS_LINUX_CONFIG +dnl # ZFS_LINUX_TEST_RESULT_SYMBOL dnl # -AC_DEFUN([ZFS_LINUX_CONFIG], - [AC_MSG_CHECKING([whether kernel was built with CONFIG_$1]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - #ifndef CONFIG_$1 - #error CONFIG_$1 not #defined - #endif - ],[ - AC_MSG_RESULT([yes]) - $2 - ],[ - AC_MSG_RESULT([no]) - $3 +dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to +dnl # verify symbol exports, unless --enable-linux-builtin was provided to +dnl # configure. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [ + AS_IF([ ! test -f build/$1/$1.ko], [ + $5 + ], [ + AS_IF([test "x$enable_linux_builtin" != "xyes"], [ + ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [$4], [$5]) + ], [ + $4 + ]) ]) ]) +dnl # +dnl # ZFS_LINUX_COMPILE_IFELSE +dnl # +AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ + ZFS_LINUX_TEST_REMOVE([conftest]) + + m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])]) + m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])], + [ZFS_LINUX_CONFTEST_H([], [conftest])]) + + ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no], + [m4_ifvaln([$5], [-I$PWD/build/conftest], [])]) + ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], []) +]) + +dnl # +dnl # ZFS_LINUX_TRY_COMPILE +dnl # +dnl # $1 - global +dnl # $2 - source +dnl # $3 - run on success (valid .ko generated) +dnl # $4 - run on failure (unable to compile) +dnl # +AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.ko], + [$3], [$4]) +]) + dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT -dnl # check symbol exported or not +dnl # +dnl # Check if a symbol is exported on not by consulting the symbols +dnl # file, or optionally the source code. dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ @@ -649,8 +851,10 @@ AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL -dnl # like ZFS_LINUX_TRY_COMPILE, except ZFS_CHECK_SYMBOL_EXPORT -dnl # is called if not compiling for builtin +dnl # +dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called +dnl # to verify symbol exports, unless --enable-linux-builtin was provided +dnl # to configure. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) @@ -673,10 +877,9 @@ dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # -AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], - [ZFS_LINUX_COMPILE_IFELSE( - [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], - [modules], - [test -s build/conftest.o], - [$3], [$4], [$5]) +AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.ko], + [$3], [$4], [$5]) ]) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index c2e5bb25fe2..92aa6030dd1 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -166,6 +166,17 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ]) AC_DEFUN([ZFS_AC_CONFIG], [ + + dnl # Remove the previous build test directory. + rm -Rf build + + AC_ARG_VAR([TEST_JOBS], + [simultaneous jobs during configure (defaults to $(nproc))]) + if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then + TEST_JOBS=$(nproc) + fi + AC_SUBST(TEST_JOBS) + ZFS_CONFIG=all AC_ARG_WITH([config], AS_HELP_STRING([--with-config=CONFIG], diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4 index aa0fc142093..b3c1befaac5 100644 --- a/config/zfs-meta.m4 +++ b/config/zfs-meta.m4 @@ -138,6 +138,24 @@ AC_DEFUN([ZFS_AC_META], [ AC_SUBST([ZFS_META_AUTHOR]) fi + ZFS_META_KVER_MIN=_ZFS_AC_META_GETVAL([Linux-Minimum]); + if test -n "$ZFS_META_KVER_MIN"; then + AC_DEFINE_UNQUOTED([ZFS_META_KVER_MIN], + ["$ZFS_META_KVER_MIN"], + [Define the minimum compatible kernel version.] + ) + AC_SUBST([ZFS_META_KVER_MIN]) + fi + + ZFS_META_KVER_MAX=_ZFS_AC_META_GETVAL([Linux-Maximum]); + if test -n "$ZFS_META_KVER_MAX"; then + AC_DEFINE_UNQUOTED([ZFS_META_KVER_MAX], + ["$ZFS_META_KVER_MAX"], + [Define the maximum compatible kernel version.] + ) + AC_SUBST([ZFS_META_KVER_MAX]) + fi + m4_pattern_allow([^LT_(CURRENT|REVISION|AGE)$]) ZFS_META_LT_CURRENT=_ZFS_AC_META_GETVAL([LT_Current]); ZFS_META_LT_REVISION=_ZFS_AC_META_GETVAL([LT_Revision]); From 0be40959fe265ea6e89cc7d5b929d5fddb27f9a3 Mon Sep 17 00:00:00 2001 From: dacianstremtan <35844628+dacianstremtan@users.noreply.github.com> Date: Tue, 1 Oct 2019 15:54:27 -0400 Subject: [PATCH 233/325] Fix for zfs-dracut regression Line 31 and 32 overwrote the ${root} variable which broke mount-zfs.sh We have create a new variable for the dataset instead of overwriting the ${root} variable in zfs-load-key.sh${root} variable in zfs-load-key.sh Reviewed-by: Kash Pande Reviewed-by: Garrett Fields Reviewed-by: Brian Behlendorf Signed-off-by: Dacian Reece-Stremtan Closes #8913 Closes #9379 --- contrib/dracut/90zfs/zfs-load-key.sh.in | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 9e7adfc7978..42dc1d08f3d 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -25,22 +25,23 @@ while true; do done # run this after import as zfs-import-cache/scan service is confirmed good +# we do not overwrite the ${root} variable, but create a new one, BOOTFS, to hold the dataset if [ "${root}" = "zfs:AUTO" ] ; then - root="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')" + BOOTFS="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')" else - root="${root##zfs:}" - root="${root##ZFS=}" + BOOTFS="${root##zfs:}" + BOOTFS="${root##ZFS=}" fi # if pool encryption is active and the zfs command understands '-o encryption' -if [ "$(zpool list -H -o feature@encryption $(echo "${root}" | awk -F\/ '{print $1}'))" = 'active' ]; then +if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{print $1}'))" = 'active' ]; then # if the root dataset has encryption enabled - ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${root}") + ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}") if ! [ "${ENCRYPTIONROOT}" = "-" ]; then # decrypt them TRY_COUNT=5 while [ $TRY_COUNT -gt 0 ]; do - systemd-ask-password "Encrypted ZFS password for ${root}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break + systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break TRY_COUNT=$((TRY_COUNT - 1)) done fi From 5f67022bf7fd37d2be759413ee87e8c5f9f7cb46 Mon Sep 17 00:00:00 2001 From: Didier Roche Date: Wed, 2 Oct 2019 19:51:55 +0200 Subject: [PATCH 234/325] Workaround to avoid a race when /var/lib is a persistent dataset If /var/lib is a dataset not under /ROOT/, as proposed in the ubuntu root on zfs upstream guide (https://github.com/zfsonlinux/zfs/wiki/Ubuntu-18.04-Root-on-ZFS), we end up with a race where some services, like systemd-random-seed are writing under /var/lib, while zfs-mount is called. zfs mount will then potentially fail because of /var/lib isn't empty and so, can't be mounted. Order those 2 units for now (more may be needed) as we can't declare virtually a provide mount point to match "RequiresMountsFor=/var/lib/systemd/random-seed" from systemd-random-seed.service. The optional generator for zfs 0.8 fixes it, but it's not enabled by default nor necessarily required. Example: - rpool/ROOT/ubuntu (mountpoint = /) - rpool/var/ (mountpoint = /var) - rpool/var/lib (mountpoint = /var/lib) Both zfs-mount.service and systemd-random-seed.service are starting After=systemd-remount-fs.service. zfs-mount.service should be done before local-fs.target while systemd-random-seed.service should finish before sysinit.target (which is a later target). Ideally, we would have a way for zfs mount -a unit to declare all paths or move systemd-random-seed after local-fs.target. Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Didier Roche Closes #9360 --- etc/systemd/system/zfs-mount.service.in | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/systemd/system/zfs-mount.service.in b/etc/systemd/system/zfs-mount.service.in index a18691a4680..6507c0765d0 100644 --- a/etc/systemd/system/zfs-mount.service.in +++ b/etc/systemd/system/zfs-mount.service.in @@ -6,6 +6,7 @@ After=systemd-udev-settle.service After=zfs-import.target After=systemd-remount-fs.service Before=local-fs.target +Before=systemd-random-seed.service [Service] Type=oneshot From 05e2a4cfc9eaece89971417c70ff92abf768258d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 3 Oct 2019 09:39:13 -0700 Subject: [PATCH 235/325] ZTS: Fix upgrade_readonly_pool Update cleanup_upgrade to use destroy_dataset and destroy_pool when performing cleanup. These wrappers retry if the pool is busy preventing occasional failures like those observed when running tests upgrade_readonly_pool. For example: SUCCESS: test enabled == enabled User accounting upgrade is not executed on readonly pool NOTE: Performing local cleanup via log_onexit (cleanup_upgrade) cannot destroy 'testpool': pool is busy ERROR: zpool destroy testpool exited 1 Reviewed-by: Ryan Moeller Reviewed-by: John Kennedy Reviewed-by: Igor Kozhukhov Signed-off-by: Brian Behlendorf Closes #9400 --- .../tests/functional/upgrade/upgrade_common.kshlib | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib b/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib index 679ff304923..6ffd85b5b1b 100644 --- a/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib +++ b/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib @@ -34,8 +34,8 @@ export TMPDEV=$TEST_BASE_DIR/zpool_upgrade_test.dat function cleanup_upgrade { - datasetexists $TESTPOOL/fs1 && log_must zfs destroy $TESTPOOL/fs1 - datasetexists $TESTPOOL/fs2 && log_must zfs destroy $TESTPOOL/fs2 - datasetexists $TESTPOOL/fs3 && log_must zfs destroy $TESTPOOL/fs3 - datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL + destroy_dataset "$TESTPOOL/fs1" + destroy_dataset "$TESTPOOL/fs2" + destroy_dataset "$TESTPOOL/fs3" + destroy_pool "$TESTPOOL" } From b43893de86103c7e8fc6c1df46950a312852dd49 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Thu, 3 Oct 2019 15:54:29 -0700 Subject: [PATCH 236/325] Rename rangelock_ functions to zfs_rangelock_ A rangelock KPI already exists on FreeBSD. Add a zfs_ prefix as per our convention to prevent any conflict with existing symbols. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: Matt Macy Closes #9402 --- include/sys/zfs_rlock.h | 10 +++--- module/zfs/zfs_rlock.c | 70 +++++++++++++++++++++-------------------- module/zfs/zfs_vnops.c | 34 ++++++++++---------- module/zfs/zfs_znode.c | 28 ++++++++--------- module/zfs/zvol.c | 24 +++++++------- 5 files changed, 84 insertions(+), 82 deletions(-) diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 05b080843d7..5f1e2a364e4 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -66,13 +66,13 @@ typedef struct locked_range { uint8_t lr_read_wanted; /* reader wants to lock this range */ } locked_range_t; -void rangelock_init(rangelock_t *, rangelock_cb_t *, void *); -void rangelock_fini(rangelock_t *); +void zfs_rangelock_init(rangelock_t *, rangelock_cb_t *, void *); +void zfs_rangelock_fini(rangelock_t *); -locked_range_t *rangelock_enter(rangelock_t *, +locked_range_t *zfs_rangelock_enter(rangelock_t *, uint64_t, uint64_t, rangelock_type_t); -void rangelock_exit(locked_range_t *); -void rangelock_reduce(locked_range_t *, uint64_t, uint64_t); +void zfs_rangelock_exit(locked_range_t *); +void zfs_rangelock_reduce(locked_range_t *, uint64_t, uint64_t); #ifdef __cplusplus } diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index d514a4fc775..94203a40c58 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -104,7 +104,7 @@ * Locks are ordered on the start offset of the range. */ static int -rangelock_compare(const void *arg1, const void *arg2) +zfs_rangelock_compare(const void *arg1, const void *arg2) { const locked_range_t *rl1 = (const locked_range_t *)arg1; const locked_range_t *rl2 = (const locked_range_t *)arg2; @@ -118,17 +118,17 @@ rangelock_compare(const void *arg1, const void *arg2) * and may increase the range that's locked for RL_WRITER. */ void -rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg) +zfs_rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg) { mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL); - avl_create(&rl->rl_tree, rangelock_compare, + avl_create(&rl->rl_tree, zfs_rangelock_compare, sizeof (locked_range_t), offsetof(locked_range_t, lr_node)); rl->rl_cb = cb; rl->rl_arg = arg; } void -rangelock_fini(rangelock_t *rl) +zfs_rangelock_fini(rangelock_t *rl) { mutex_destroy(&rl->rl_lock); avl_destroy(&rl->rl_tree); @@ -138,7 +138,7 @@ rangelock_fini(rangelock_t *rl) * Check if a write lock can be grabbed, or wait and recheck until available. */ static void -rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *lr; @@ -209,7 +209,7 @@ rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) * a proxy and return the proxy. */ static locked_range_t * -rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) +zfs_rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) { locked_range_t *proxy; @@ -241,7 +241,7 @@ rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) * returning the *front* proxy. */ static locked_range_t * -rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) +zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) { ASSERT3U(lr->lr_length, >, 1); ASSERT3U(off, >, lr->lr_offset); @@ -259,7 +259,7 @@ rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) rear->lr_write_wanted = B_FALSE; rear->lr_read_wanted = B_FALSE; - locked_range_t *front = rangelock_proxify(tree, lr); + locked_range_t *front = zfs_rangelock_proxify(tree, lr); front->lr_length = off - lr->lr_offset; avl_insert_here(tree, rear, front, AVL_AFTER); @@ -270,7 +270,7 @@ rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) * Create and add a new proxy range lock for the supplied range. */ static void -rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) +zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) { ASSERT(len != 0); locked_range_t *lr = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); @@ -285,7 +285,7 @@ rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) } static void -rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, +zfs_rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, locked_range_t *prev, avl_index_t where) { locked_range_t *next; @@ -307,7 +307,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, * convert to proxy if needed then * split this entry and bump ref count */ - prev = rangelock_split(tree, prev, off); + prev = zfs_rangelock_split(tree, prev, off); prev = AVL_NEXT(tree, prev); /* move to rear range */ } } @@ -326,7 +326,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, if (off < next->lr_offset) { /* Add a proxy for initial range before the overlap */ - rangelock_new_proxy(tree, off, next->lr_offset - off); + zfs_rangelock_new_proxy(tree, off, next->lr_offset - off); } new->lr_count = 0; /* will use proxies in tree */ @@ -344,30 +344,30 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, /* there's a gap */ ASSERT3U(next->lr_offset, >, prev->lr_offset + prev->lr_length); - rangelock_new_proxy(tree, + zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, next->lr_offset - (prev->lr_offset + prev->lr_length)); } if (off + len == next->lr_offset + next->lr_length) { /* exact overlap with end */ - next = rangelock_proxify(tree, next); + next = zfs_rangelock_proxify(tree, next); next->lr_count++; return; } if (off + len < next->lr_offset + next->lr_length) { /* new range ends in the middle of this block */ - next = rangelock_split(tree, next, off + len); + next = zfs_rangelock_split(tree, next, off + len); next->lr_count++; return; } ASSERT3U(off + len, >, next->lr_offset + next->lr_length); - next = rangelock_proxify(tree, next); + next = zfs_rangelock_proxify(tree, next); next->lr_count++; } /* Add the remaining end range. */ - rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, + zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, (off + len) - (prev->lr_offset + prev->lr_length)); } @@ -375,7 +375,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, * Check if a reader lock can be grabbed, or wait and recheck until available. */ static void -rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *prev, *next; @@ -437,7 +437,7 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * Add the read lock, which may involve splitting existing * locks and bumping ref counts (r_count). */ - rangelock_add_reader(tree, new, prev, where); + zfs_rangelock_add_reader(tree, new, prev, where); } /* @@ -448,7 +448,7 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * entire file is locked as RL_WRITER). */ locked_range_t * -rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, +zfs_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, rangelock_type_t type) { ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); @@ -473,9 +473,11 @@ rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, if (avl_numnodes(&rl->rl_tree) == 0) avl_add(&rl->rl_tree, new); else - rangelock_enter_reader(rl, new); - } else - rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */ + zfs_rangelock_enter_reader(rl, new); + } else { + /* RL_WRITER or RL_APPEND */ + zfs_rangelock_enter_writer(rl, new); + } mutex_exit(&rl->rl_lock); return (new); } @@ -484,7 +486,7 @@ rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, * Safely free the locked_range_t. */ static void -rangelock_free(locked_range_t *lr) +zfs_rangelock_free(locked_range_t *lr) { if (lr->lr_write_wanted) cv_destroy(&lr->lr_write_cv); @@ -499,7 +501,7 @@ rangelock_free(locked_range_t *lr) * Unlock a reader lock */ static void -rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, +zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, list_t *free_list) { avl_tree_t *tree = &rl->rl_tree; @@ -561,7 +563,7 @@ rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, * Unlock range and destroy range lock structure. */ void -rangelock_exit(locked_range_t *lr) +zfs_rangelock_exit(locked_range_t *lr) { rangelock_t *rl = lr->lr_rangelock; list_t free_list; @@ -592,12 +594,12 @@ rangelock_exit(locked_range_t *lr) * lock may be shared, let rangelock_exit_reader() * release the lock and free the locked_range_t. */ - rangelock_exit_reader(rl, lr, &free_list); + zfs_rangelock_exit_reader(rl, lr, &free_list); } mutex_exit(&rl->rl_lock); while ((free_lr = list_remove_head(&free_list)) != NULL) - rangelock_free(free_lr); + zfs_rangelock_free(free_lr); list_destroy(&free_list); } @@ -608,7 +610,7 @@ rangelock_exit(locked_range_t *lr) * entry in the tree. */ void -rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) +zfs_rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) { rangelock_t *rl = lr->lr_rangelock; @@ -631,9 +633,9 @@ rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) } #if defined(_KERNEL) -EXPORT_SYMBOL(rangelock_init); -EXPORT_SYMBOL(rangelock_fini); -EXPORT_SYMBOL(rangelock_enter); -EXPORT_SYMBOL(rangelock_exit); -EXPORT_SYMBOL(rangelock_reduce); +EXPORT_SYMBOL(zfs_rangelock_init); +EXPORT_SYMBOL(zfs_rangelock_fini); +EXPORT_SYMBOL(zfs_rangelock_enter); +EXPORT_SYMBOL(zfs_rangelock_exit); +EXPORT_SYMBOL(zfs_rangelock_reduce); #endif diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index de7b59935e8..03a8c4a50b0 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -485,7 +485,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Lock the range against changes. */ - locked_range_t *lr = rangelock_enter(&zp->z_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, uio->uio_loffset, uio->uio_resid, RL_READER); /* @@ -558,7 +558,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); task_io_account_read(nread); out: - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (error); @@ -672,7 +672,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ - lr = rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); woff = lr->lr_offset; if (lr->lr_length == UINT64_MAX) { /* @@ -689,11 +689,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ - lr = rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); } if (woff >= limit) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (SET_ERROR(EFBIG)); } @@ -811,7 +811,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); - rangelock_reduce(lr, woff, n); + zfs_rangelock_reduce(lr, woff, n); } /* @@ -950,7 +950,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) } zfs_inode_update(zp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); /* * If we're in replay mode, or we made no progress, return error. @@ -1003,7 +1003,7 @@ zfs_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); /* * Release the vnode asynchronously as we currently have the @@ -1064,7 +1064,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = rangelock_enter(&zp->z_rangelock, + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { @@ -1086,12 +1086,12 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; - zgd->zgd_lr = rangelock_enter(&zp->z_rangelock, + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, size, RL_READER); if (zp->z_blksz == size) break; offset += blkoff; - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) @@ -4517,14 +4517,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) redirty_page_for_writepage(wbc, pp); unlock_page(pp); - locked_range_t *lr = rangelock_enter(&zp->z_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, pgoff, pglen, RL_WRITER); lock_page(pp); /* Page mapping changed or it was no longer dirty, we're done */ if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (0); } @@ -4532,7 +4532,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Another process started write block if required */ if (PageWriteback(pp)) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(pp)) @@ -4546,7 +4546,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Clear the dirty flag the required locks are held */ if (!clear_page_dirty_for_io(pp)) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (0); } @@ -4573,7 +4573,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) __set_page_dirty_nobuffers(pp); ClearPageError(pp); end_page_writeback(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (err); } @@ -4600,7 +4600,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) zfs_putpage_commit_cb, pp); dmu_tx_commit(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); if (wbc->sync_mode != WB_SYNC_NONE) { /* diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 234e134904a..7a72d953a2a 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -129,7 +129,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); - rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); + zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; @@ -151,7 +151,7 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); rw_destroy(&zp->z_xattr_lock); - rangelock_fini(&zp->z_rangelock); + zfs_rangelock_fini(&zp->z_rangelock); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_acl_cached == NULL); @@ -1474,13 +1474,13 @@ zfs_extend(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1510,7 +1510,7 @@ zfs_extend(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1522,7 +1522,7 @@ zfs_extend(znode_t *zp, uint64_t end) VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); - rangelock_exit(lr); + zfs_rangelock_exit(lr); dmu_tx_commit(tx); @@ -1591,13 +1591,13 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) /* * Lock the range being freed. */ - lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } @@ -1647,7 +1647,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) page_len); } } - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1673,20 +1673,20 @@ zfs_trunc(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, DMU_OBJECT_END); if (error) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1696,7 +1696,7 @@ zfs_trunc(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1712,7 +1712,7 @@ zfs_trunc(znode_t *zp, uint64_t end) VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 840b8d008ec..93719dcca5f 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -783,7 +783,7 @@ zvol_write(void *arg) if (error) break; } - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); int64_t nwritten = start_resid - uio.uio_resid; dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); @@ -878,7 +878,7 @@ zvol_discard(void *arg) ZVOL_OBJ, start, size); } unlock: - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); if (error == 0 && sync) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -924,7 +924,7 @@ zvol_read(void *arg) break; } } - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); int64_t nread = start_resid - uio.uio_resid; dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); @@ -944,7 +944,7 @@ zvol_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); kmem_free(zgd, sizeof (zgd_t)); } @@ -977,8 +977,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size, - RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + size, RL_READER); error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf, DMU_READ_NO_PREFETCH); } else { /* indirect write */ @@ -990,8 +990,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) */ size = zv->zv_volblocksize; offset = P2ALIGN_TYPED(offset, size, uint64_t); - zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size, - RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + size, RL_READER); error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db, DMU_READ_NO_PREFETCH); if (error == 0) { @@ -1089,7 +1089,7 @@ zvol_request(struct request_queue *q, struct bio *bio) * are asynchronous, we take it here synchronously to make * sure overlapped I/Os are properly ordered. */ - zvr->lr = rangelock_enter(&zv->zv_rangelock, offset, size, + zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size, RL_WRITER); /* * Sync writes and discards execute zil_commit() which may need @@ -1128,7 +1128,7 @@ zvol_request(struct request_queue *q, struct bio *bio) rw_enter(&zv->zv_suspend_lock, RW_READER); - zvr->lr = rangelock_enter(&zv->zv_rangelock, offset, size, + zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size, RL_READER); if (zvol_request_sync || taskq_dispatch(zvol_taskq, zvol_read, zvr, TQ_SLEEP) == TASKQID_INVALID) @@ -1725,7 +1725,7 @@ zvol_alloc(dev_t dev, const char *name) zv->zv_open_count = 0; strlcpy(zv->zv_name, name, MAXNAMELEN); - rangelock_init(&zv->zv_rangelock, NULL, NULL); + zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL); rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL); zv->zv_disk->major = zvol_major; @@ -1783,7 +1783,7 @@ zvol_free(void *arg) ASSERT(zv->zv_disk->private_data == NULL); rw_destroy(&zv->zv_suspend_lock); - rangelock_fini(&zv->zv_rangelock); + zfs_rangelock_fini(&zv->zv_rangelock); del_gendisk(zv->zv_disk); blk_cleanup_queue(zv->zv_queue); From 5a1bf9e8b17f8414d7bd30a833d4d2a26a1851a6 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 4 Oct 2019 12:30:51 -0700 Subject: [PATCH 237/325] Fix automount for root filesystems Commit 093bb64 resolved an automount failures for chroot'd processes but inadvertently broke automounting for root filesystems where the vfs_mntpoint is NULL. Resolve the issue by checking for NULL in order to generate the correct path. Reviewed-by: Tom Caputi Signed-off-by: Brian Behlendorf Closes #9381 Closes #9384 --- module/zfs/zfs_ctldir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 1e61ef06d00..3b2a6eb8273 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -1053,7 +1053,8 @@ zfsctl_snapshot_mount(struct path *path, int flags) * on mount.zfs(8). */ snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", - zfsvfs->z_vfs->vfs_mntpoint, dname(dentry)); + zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", + dname(dentry)); /* * Multiple concurrent automounts of a snapshot are never allowed. From 5e78137f28c0dada02f96a46c0e28905316c7e7b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 4 Oct 2019 12:38:07 -0700 Subject: [PATCH 238/325] ZTS: Fix trim/trim_config and trim/autotrim_config There have been occasional CI failures which occur when the trimmed vdev size exactly matches the target size. Resolve this by slightly relaxing the conditional and checking for -ge rather than -gt. In all of the cases observer, the values match exactly. For example: Failure /mnt/trim-vdev1 is 768 MB which is not -gt than 768 MB Reviewed-by: Ryan Moeller Signed-off-by: Brian Behlendorf Closes #9399 --- tests/zfs-tests/tests/functional/trim/autotrim_config.ksh | 2 +- tests/zfs-tests/tests/functional/trim/trim_config.ksh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index 6ce396a3801..e41e325687d 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -89,7 +89,7 @@ for type in "" "mirror" "raidz2"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R - verify_vdevs "-gt" "$VDEV_MAX_MB" $VDEVS + verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, wait for trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh index e56bd6248f2..993072b1084 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -88,7 +88,7 @@ for type in "" "mirror" "raidz2"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R - verify_vdevs "-gt" "$VDEV_MAX_MB" $VDEVS + verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, issue trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file From 8139355dceeface1a72c25ea368235f642a6f290 Mon Sep 17 00:00:00 2001 From: George Melikov Date: Tue, 8 Oct 2019 20:10:23 +0300 Subject: [PATCH 239/325] module/Makefile.in: don't run xargs if empty If stdin if empty - don't run xargs command, otherwise we can get `cp: missing file operand` error. Reviewed-by: Ryan Moeller Reviewed-by: Brian Behlendorf Signed-off-by: George Melikov Closes #9418 --- module/Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/Makefile.in b/module/Makefile.in index 7477dbe5650..ea8b8340d8b 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -68,7 +68,7 @@ distdir: list='$(obj-m)'; for objdir in $$list; do \ (cd @top_srcdir@/module && find $$objdir \ -name '*.c' -o -name '*.h' -o -name '*.S' | \ - xargs cp --parents -t @abs_top_builddir@/module/$$distdir); \ + xargs -r cp --parents -t @abs_top_builddir@/module/$$distdir); \ done distclean maintainer-clean: clean From 8af362c3e9c04d030b995b4f8779b7680a7af397 Mon Sep 17 00:00:00 2001 From: Igor K Date: Tue, 8 Oct 2019 23:40:17 +0300 Subject: [PATCH 240/325] ZTS: Fix mmp_hostid test Correctly use the `mntpnt_fs` variable, and include additional logic to ensure the /etc/hostid is correct set up and cleaned up. Signed-off-by: Brian Behlendorf Signed-off-by: Igor Kozhukhov Closes #9349 --- tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh index b492b1070ca..e3c6e34f4bc 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh @@ -43,6 +43,9 @@ function cleanup log_must rm $MMP_DIR/file.{0,1,2,3,4,5} log_must rmdir $MMP_DIR log_must mmp_clear_hostid + if [[ -L $HOSTID_FILE ]]; then + rm -f $HOSTID_FILE + fi } log_assert "Verify hostid file can reside on a ZFS dataset" @@ -64,10 +67,11 @@ log_must mv $HOSTID_FILE $mntpnt_etc/hostid # 3. Create a file so the pool will have some contents log_must zfs create $MMP_POOL/fs mntpnt_fs=$(get_prop mountpoint $MMP_POOL/fs) -log_must mkfile 1M $fs_mntpnt/file +log_must mkfile 1M $mntpnt_fs/file # 4. Verify multihost cannot be enabled until the /etc/hostid is linked log_mustnot zpool set multihost=on $MMP_POOL +log_mustnot ls -l $HOSTID_FILE log_must ln -s $mntpnt_etc/hostid $HOSTID_FILE log_must zpool set multihost=on $MMP_POOL From 90bc5ca5e14bd182e66abdc51c4d822238b509bf Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 10 Oct 2019 09:49:45 -0700 Subject: [PATCH 241/325] Update `zfs program` command usage Update the zfs(8) man page to clearly describe that arguments for channel programs are to be listed after the -- sentinel which terminates argument processing. This behavior is supported by getopt on Linux, FreeBSD, and Illumos according to each platforms respective man pages. zfs program [-jn] [-t instruction-limit] [-m memory-limit] pool script [--] arg1 ... Reviewed-by: Clint Armstrong Reviewed-by: George Melikov Reviewed-by: loli10K Signed-off-by: Brian Behlendorf Closes #9056 Closes #9428 --- man/man8/zfs.8 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 8d7b0bbb6fb..229b0ba69bf 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -305,7 +305,8 @@ .Op Fl t Ar instruction-limit .Op Fl m Ar memory-limit .Ar pool script -.Op Ar arg1 No ... +.Op -- +.Ar arg1 No ... .Nm .Cm load-key .Op Fl nr @@ -4469,7 +4470,8 @@ Display the path's inode change time as the first column of output. .Op Fl t Ar instruction-limit .Op Fl m Ar memory-limit .Ar pool script -.Op Ar arg1 No ... +.Op -- +.Ar arg1 No ... .Xc Executes .Ar script From f1ba5478a3c140e9062e902bf9a4e4c29662d50f Mon Sep 17 00:00:00 2001 From: loli10K Date: Fri, 11 Oct 2019 01:39:41 +0200 Subject: [PATCH 242/325] Fix pool creation with feature@allocation_classes disabled When "feature@allocation_classes" is not enabled on the pool no vdev with "special" or "dedup" allocation type should be allowed to exist in the vdev tree. Reviewed-by: Pavel Zakharov Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9427 Closes #9429 --- include/zfs_comutil.h | 1 + module/zcommon/zfs_comutil.c | 28 +++++++++++++++++++ module/zfs/spa.c | 10 +++++++ .../alloc_class/alloc_class_001_pos.ksh | 6 +++- 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h index 1360d6e1c17..7cdc6d6938a 100644 --- a/include/zfs_comutil.h +++ b/include/zfs_comutil.h @@ -34,6 +34,7 @@ extern "C" { #endif extern boolean_t zfs_allocatable_devs(nvlist_t *); +extern boolean_t zfs_special_devs(nvlist_t *); extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *); extern int zfs_zpl_version_map(int spa_version); diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 5daa6907c5d..a3ff7d8e699 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -64,6 +64,33 @@ zfs_allocatable_devs(nvlist_t *nv) return (B_FALSE); } +/* + * Are there special vdevs? + */ +boolean_t +zfs_special_devs(nvlist_t *nv) +{ + char *bias; + uint_t c; + nvlist_t **child; + uint_t children; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + return (B_FALSE); + } + for (c = 0; c < children; c++) { + if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, + &bias) == 0) { + if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0 || + strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0) { + return (B_TRUE); + } + } + } + return (B_FALSE); +} + void zpool_get_load_policy(nvlist_t *nvl, zpool_load_policy_t *zlpp) { @@ -223,6 +250,7 @@ zfs_dataset_name_hidden(const char *name) #if defined(_KERNEL) EXPORT_SYMBOL(zfs_allocatable_devs); +EXPORT_SYMBOL(zfs_special_devs); EXPORT_SYMBOL(zpool_get_load_policy); EXPORT_SYMBOL(zfs_zpl_version_map); EXPORT_SYMBOL(zfs_spa_version_map); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7a20330c187..7fa18cbd1de 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5134,6 +5134,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, uint64_t version, obj; boolean_t has_features; boolean_t has_encryption; + boolean_t has_allocclass; spa_feature_t feat; char *feat_name; char *poolname; @@ -5178,6 +5179,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, has_features = B_FALSE; has_encryption = B_FALSE; + has_allocclass = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { if (zpool_prop_feature(nvpair_name(elem))) { @@ -5187,6 +5189,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, VERIFY0(zfeature_lookup_name(feat_name, &feat)); if (feat == SPA_FEATURE_ENCRYPTION) has_encryption = B_TRUE; + if (feat == SPA_FEATURE_ALLOCATION_CLASSES) + has_allocclass = B_TRUE; } } @@ -5200,6 +5204,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, return (error); } } + if (!has_allocclass && zfs_special_devs(nvroot)) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (ENOTSUP); + } if (has_features || nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh index 441df829671..3237d7cb784 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh @@ -20,7 +20,8 @@ # # DESCRIPTION: -# Creating a pool with a special device succeeds. +# Creating a pool with a special device succeeds, but only if +# "feature@allocation_classes" is enabled. # verify_runnable "global" @@ -31,6 +32,9 @@ log_assert $claim log_onexit cleanup log_must disk_setup +for type in special dedup; do + log_mustnot zpool create -d $TESTPOOL $CLASS_DISK0 $type $CLASS_DISK1 +done log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ $CLASS_DISK0 $CLASS_DISK1 log_must display_status "$TESTPOOL" From 33cd5f29978616072b4f81ae6066a6fa4be1a353 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Fri, 11 Oct 2019 12:49:48 -0400 Subject: [PATCH 243/325] Fix some style nits in tests Mostly whitespace changes, no functional changes intended. Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Ryan Moeller Closes #9447 --- .../tests/functional/cli_root/zfs/zfs_002_pos.ksh | 2 +- .../functional/cli_root/zfs_send/zfs_send_006_pos.ksh | 2 +- .../cli_root/zfs_unmount/zfs_unmount_008_neg.ksh | 11 +++++------ .../cli_root/zpool_create/zpool_create.shlib | 2 +- .../tests/functional/history/history_001_pos.ksh | 2 +- .../tests/functional/history/history_common.kshlib | 2 +- .../tests/functional/inheritance/inherit_001_pos.ksh | 11 +++++------ 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh index b21b6c657df..92382aa11d6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh @@ -62,7 +62,7 @@ log_assert "With ZFS_ABORT set, all zfs commands can abort and generate a " \ "core file." log_onexit cleanup -#preparation work for testing +# Preparation work for testing corepath=$TESTDIR/core if [[ -d $corepath ]]; then rm -rf $corepath diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh index 7192551b6c5..652f7b738f0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh @@ -54,7 +54,7 @@ function get_estimate_size typeset snapshot=$1 typeset option=$2 typeset base_snapshot=${3:-""} - if [[ -z $3 ]];then + if [[ -z $3 ]]; then typeset total_size=$(zfs send $option $snapshot 2>&1 | tail -1) else typeset total_size=$(zfs send $option $base_snapshot $snapshot \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh index e83e8d5165e..afec9d89629 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh @@ -95,15 +95,14 @@ for arg in ${badargs[@]}; do log_mustnot eval "zfs unmount $arg $fs >/dev/null 2>&1" done - -#Testing invalid datasets +# Testing invalid datasets for ds in $snap $vol "blah"; do for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt $ds >/dev/null 2>&1" done done -#Testing invalid mountpoint +# Testing invalid mountpoint dir=foodir.$$ file=foo.$$ fs1=$TESTPOOL/fs.$$ @@ -119,20 +118,20 @@ for mpt in "./$dir" "./$file" "/tmp"; do done cd $curpath -#Testing null argument and too many arguments +# Testing null argument and too many arguments for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt >/dev/null 2>&1" log_mustnot eval "zfs unmount $opt $fs $fs1 >/dev/null 2>&1" done -#Testing already unmounted filesystem +# Testing already unmounted filesystem log_must zfs unmount $fs1 for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt $fs1 >/dev/null 2>&1" log_mustnot eval "zfs unmount /tmp/$dir >/dev/null 2>&1" done -#Testing legacy mounted filesystem +# Testing legacy mounted filesystem log_must zfs set mountpoint=legacy $fs1 if is_linux; then log_must mount -t zfs $fs1 /tmp/$dir diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib index 3f3f4472990..31244f4ecb8 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib @@ -45,7 +45,7 @@ function create_pool_test typeset vdevs eval "typeset -a diskarray=($3)" - for vdevs in "${diskarray[@]}";do + for vdevs in "${diskarray[@]}"; do create_pool $pool $keywd $vdevs log_must poolexists $pool destroy_pool $pool diff --git a/tests/zfs-tests/tests/functional/history/history_001_pos.ksh b/tests/zfs-tests/tests/functional/history/history_001_pos.ksh index e22aaa33dbc..f33265185d5 100755 --- a/tests/zfs-tests/tests/functional/history/history_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_001_pos.ksh @@ -115,7 +115,7 @@ import_dir=$TEST_BASE_DIR/import_dir.$$ log_must mkdir $import_dir log_must cp $STF_SUITE/tests/functional/history/zfs-pool-v4.dat.Z $import_dir log_must uncompress $import_dir/zfs-pool-v4.dat.Z -upgrade_pool=$(zpool import -d $import_dir | grep "pool:" | awk '{print $2}') +upgrade_pool=$(zpool import -d $import_dir | awk '/pool:/ { print $2 }') log_must zpool import -d $import_dir $upgrade_pool run_and_verify -p "$upgrade_pool" "zpool upgrade $upgrade_pool" diff --git a/tests/zfs-tests/tests/functional/history/history_common.kshlib b/tests/zfs-tests/tests/functional/history/history_common.kshlib index d97e015fcfe..b82c60cbb69 100644 --- a/tests/zfs-tests/tests/functional/history/history_common.kshlib +++ b/tests/zfs-tests/tests/functional/history/history_common.kshlib @@ -110,7 +110,7 @@ function verify_long fi typeset suffix="" - if [ is_linux ]; then + if is_linux; then suffix=":linux" fi diff --git a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh index 76bd05ce57d..b1c24fa3a74 100755 --- a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh @@ -401,18 +401,17 @@ set -A local_val "off" "on" "off" \ # # Add system specific values # - -if ! is_linux; then +if is_linux; then + prop+=("acltype" "") + def_val+=("off") + local_val+=("off") +else prop+=("aclmode" "" \ "mountpoint" "") def_val+=("discard" \ "") local_val+=("groupmask" \ "$TESTDIR") -else - prop+=("acltype" "") - def_val+=("off") - local_val+=("off") fi From c99b304f01947154c94c59290ad832ebdadc95ab Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Fri, 11 Oct 2019 12:50:46 -0400 Subject: [PATCH 244/325] Clarify loop variable name in zfs copies test Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Ryan Moeller Closes #9445 --- .../functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh index 5946bf59679..4a3ef76de76 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh @@ -70,8 +70,8 @@ if [[ ! -d $mntp ]]; then mkdir -p $mntp fi -for val in 1 2 3; do - do_vol_test $NEWFS_DEFAULT_FS $val $mntp +for copies in 1 2 3; do + do_vol_test $NEWFS_DEFAULT_FS $copies $mntp done log_pass "The volume space used by multiple copies is charged correctly as expected. " From e416b165ffed67ec14ac07404b803122aefa2e9b Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Wed, 9 Oct 2019 12:16:12 -0700 Subject: [PATCH 245/325] Implement ZPOOL_IMPORT_UDEV_TIMEOUT_MS Since 0.7.0, zpool import would unconditionally block on udev for 30 seconds. This introduced a regression in initramfs environments that lack udev (particularly mdev based environments), yet use a zfs userland tools intended for the system that had been built against udev. Gentoo's genkernel is the main example, although custom user initramfs environments would be similarly impacted unless special builds of the ZFS userland utilities were done for them. Such environments already have their own mechanisms for blocking until device nodes are ready (such as genkernel's scandelay parameter), so it is unnecessary for zpool import to block on a non-existent udev until a timeout is reached inside of them. Rather than trying to intelligently determine whether udev is available on the system to avoid unnecessarily blocking in such environments, it seems best to just allow the environment to override the timeout. I propose that we add an environment variable called ZPOOL_IMPORT_UDEV_TIMEOUT_MS. Setting it to 0 would restore the 0.6.x behavior that was more desirable in mdev based initramfs environments. This allows the system user land utilities to be reused when building mdev-based initramfs archives. Reviewed-by: Igor Kozhukhov Reviewed-by: Jorgen Lundman Reviewed-by: Brian Behlendorf Reviewed-by: Georgy Yakovlev Signed-off-by: Richard Yao Closes #9436 --- lib/libzutil/zutil_import.c | 10 +++++++++- man/man8/zpool.8 | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 28733cc747c..e84680a7976 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1651,17 +1651,25 @@ zpool_open_func(void *arg) if (rn->rn_labelpaths) { char *path = NULL; char *devid = NULL; + char *env = NULL; rdsk_node_t *slice; avl_index_t where; + int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; + env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); + if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || + timeout < 0) { + timeout = DISK_LABEL_WAIT; + } + /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT); + zpool_label_disk_wait(rn->rn_name, timeout); if (path != NULL) { slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index bdad81149b8..adbb723aae7 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -2740,6 +2740,12 @@ Similar to the option in .Nm zpool import . .El +.Bl -tag -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.It Ev ZPOOL_IMPORT_UDEV_TIMEOUT_MS +The maximum time in milliseconds that +.Nm zpool import +will wait for an expected device to be available. +.El .Bl -tag -width "ZPOOL_VDEV_NAME_GUID" .It Ev ZPOOL_VDEV_NAME_GUID Cause From c54ee4c0d35c272d7c35f7f4fda34d920dcdfa00 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 11 Oct 2019 10:22:20 -0700 Subject: [PATCH 246/325] ZTS: Fix zpool_status_-s After commit 5e74ac51 which split and reordered the run files the `zpool_status_-s` test began failing. The new ordering placed the test after a previous test which used `zpool replace` to replace a disk but did not clear its label. This resulted in the next test, `zpool_status_-s`, failing because of the potentially active pool being detected on the replaced vdev. /dev/loop0 is part of potentially active pool 'testpool' Use the default_mirror_setup_noexit() and default_cleanup_noexit() functions to create the pool in `zpool_status_-s`. They use the -f flag by default. In the `scrub_after_resilver` test wipe the label during cleanup to prevent future failures if the tests are again reordered. Reviewed-by: Igor Kozhukhov Reviewed-by: Ryan Moeller Reviewed-by: John Kennedy Signed-off-by: Brian Behlendorf Closes #9451 --- .../zfs-tests/tests/functional/fault/scrub_after_resilver.ksh | 1 + tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh index a5b58ec8ff2..db4a4ad55ef 100755 --- a/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh +++ b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh @@ -42,6 +42,7 @@ function cleanup # Restore our zed.rc log_must zed_rc_restore $zedrc_backup default_cleanup_noexit + log_must zpool labelclear -f $DISK1 } log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh b/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh index b6a3e71fdfa..c919ae60851 100755 --- a/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh +++ b/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh @@ -41,14 +41,14 @@ DISK=${DISKS%% *} verify_runnable "both" -log_must zpool create $TESTPOOL mirror ${DISKS} +default_mirror_setup_noexit $DISKS function cleanup { log_must zinject -c all log_must set_tunable64 zio_slow_io_ms $OLD_SLOW_IO log_must set_tunable64 zfs_slow_io_events_per_second $OLD_SLOW_IO_EVENTS - log_must destroy_pool $TESTPOOL + default_cleanup_noexit } log_onexit cleanup From e08b98e9839bb27447a7b174c4bec93e9bc02e7d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sun, 13 Oct 2019 19:13:26 -0700 Subject: [PATCH 247/325] Modify sharenfs=on default behavior While it may sometimes be convenient to export an NFS filesystem with no_root_squash it should not be the default behavior. Align the default behavior with the Linux NFS server defaults. To restore the previous behavior use 'zfs set sharenfs="no_root_squash,..."'. Reviewed-by: loli10K Reviewed-by: Richard Laager Signed-off-by: Brian Behlendorf Closes #9397 Closes #9425 --- lib/libshare/nfs.c | 5 +++-- man/man8/zfs.8 | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/libshare/nfs.c b/lib/libshare/nfs.c index 5c8976e15aa..7cc5ae43f50 100644 --- a/lib/libshare/nfs.c +++ b/lib/libshare/nfs.c @@ -387,9 +387,10 @@ get_linux_shareopts(const char *shareopts, char **plinux_opts) *plinux_opts = NULL; - /* default options for Solaris shares */ + /* no_subtree_check - Default as of nfs-utils v1.1.0 */ (void) add_linux_shareopt(plinux_opts, "no_subtree_check", NULL); - (void) add_linux_shareopt(plinux_opts, "no_root_squash", NULL); + + /* mountpoint - Restrict exports to ZFS mountpoints */ (void) add_linux_shareopt(plinux_opts, "mountpoint", NULL); rc = foreach_nfs_shareopt(shareopts, get_linux_shareopts_cb, diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 229b0ba69bf..1572ac98265 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -1977,7 +1977,7 @@ If the property is set to .Sy on , the dataset is shared using the default options: .Pp -.Em sec=sys,rw,crossmnt,no_subtree_check,no_root_squash +.Em sec=sys,rw,crossmnt,no_subtree_check .Pp See .Xr exports 5 From 8c8f84472b4c7b70ed4d2d6a179a6b2b7b137d17 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 18 Oct 2019 10:24:28 -0700 Subject: [PATCH 248/325] Update skc_obj_alloc for spl kmem caches that are backed by Linux Currently, for certain sizes and classes of allocations we use SPL caches that are backed by caches in the Linux Slab allocator to reduce fragmentation and increase utilization of memory. The way things are implemented for these caches as of now though is that we don't keep any statistics of the allocations that we make from these caches. This patch enables the tracking of allocated objects in those SPL caches by making the trade-off of grabbing the cache lock at every object allocation and free to update the respective counter. Additionally, this patch makes those caches visible in the /proc/spl/kmem/slab special file. As a side note, enabling the specific counter for those caches enables SDB to create a more user-friendly interface than /proc/spl/kmem/slab that can also cross-reference data from slabinfo. Here is for example the output of one of those caches in SDB that outputs the name of the underlying Linux cache, the memory of SPL objects allocated in that cache, and the percentage of those objects compared to all the objects in it: ``` > spl_kmem_caches | filter obj.skc_name == "zio_buf_512" | pp name ... source total_memory util ----------- ... ----------------- ------------ ---- zio_buf_512 ... kmalloc-512[SLUB] 16.9MB 8 ``` Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #9474 --- module/spl/spl-kmem-cache.c | 14 ++++++++++++++ module/spl/spl-proc.c | 28 ++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index b39867b0374..ff3a2a9de09 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -1453,6 +1453,17 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) obj = kmem_cache_alloc(slc, kmem_flags_convert(flags)); } while ((obj == NULL) && !(flags & KM_NOSLEEP)); + if (obj != NULL) { + /* + * Even though we leave everything up to the + * underlying cache we still keep track of + * how many objects we've allocated in it for + * better debuggability. + */ + spin_lock(&skc->skc_lock); + skc->skc_obj_alloc++; + spin_unlock(&skc->skc_lock); + } goto ret; } @@ -1526,6 +1537,9 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) */ if (skc->skc_flags & KMC_SLAB) { kmem_cache_free(skc->skc_linux_cache, obj); + spin_lock(&skc->skc_lock); + skc->skc_obj_alloc--; + spin_unlock(&skc->skc_lock); return; } diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index a75bcc2145b..13eaa6301d7 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -437,11 +437,29 @@ slab_seq_show(struct seq_file *f, void *p) ASSERT(skc->skc_magic == SKC_MAGIC); - /* - * Backed by Linux slab see /proc/slabinfo. - */ - if (skc->skc_flags & KMC_SLAB) + if (skc->skc_flags & KMC_SLAB) { + /* + * This cache is backed by a generic Linux kmem cache which + * has its own accounting. For these caches we only track + * the number of active allocated objects that exist within + * the underlying Linux slabs. For the overall statistics of + * the underlying Linux cache please refer to /proc/slabinfo. + */ + spin_lock(&skc->skc_lock); + seq_printf(f, "%-36s ", skc->skc_name); + seq_printf(f, "0x%05lx %9s %9lu %8s %8u " + "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", + (long unsigned)skc->skc_flags, + "-", + (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), + "-", + (unsigned)skc->skc_obj_size, + "-", "-", "-", "-", + (long unsigned)skc->skc_obj_alloc, + "-", "-", "-", "-"); + spin_unlock(&skc->skc_lock); return (0); + } spin_lock(&skc->skc_lock); seq_printf(f, "%-36s ", skc->skc_name); @@ -461,9 +479,7 @@ slab_seq_show(struct seq_file *f, void *p) (long unsigned)skc->skc_obj_deadlock, (long unsigned)skc->skc_obj_emergency, (long unsigned)skc->skc_obj_emergency_max); - spin_unlock(&skc->skc_lock); - return (0); } From c19a6512fdb7ecda6724a7ae85dabfc9e81eabbd Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 18 Oct 2019 10:25:44 -0700 Subject: [PATCH 249/325] Name anonymous enum of KMC_BIT constants Giving a name to this enum makes it discoverable from debugging tools like DRGN and SDB. For example, with the name proposed on this patch we can iterate over these values in DRGN: ``` >>> prog.type('enum kmc_bit').enumerators (('KMC_BIT_NOTOUCH', 0), ('KMC_BIT_NODEBUG', 1), ('KMC_BIT_NOMAGAZINE', 2), ('KMC_BIT_NOHASH', 3), ('KMC_BIT_QCACHE', 4), ('KMC_BIT_KMEM', 5), ('KMC_BIT_VMEM', 6), ('KMC_BIT_SLAB', 7), ... ``` This enables SDB to easily pretty-print the flags of the spl_kmem_caches in the system like this: ``` > spl_kmem_caches -o "name,flags,total_memory" name flags total_memory ------------------------ ----------------------- ------------ abd_t KMC_NOMAGAZINE|KMC_SLAB 4.5MB arc_buf_hdr_t_full KMC_NOMAGAZINE|KMC_SLAB 12.3MB ... ... ddt_cache KMC_VMEM 583.7KB ddt_entry_cache KMC_NOMAGAZINE|KMC_SLAB 0.0B ... ... zio_buf_1048576 KMC_NODEBUG|KMC_VMEM 0.0B ... ... ``` Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #9478 --- include/spl/sys/kmem_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/spl/sys/kmem_cache.h b/include/spl/sys/kmem_cache.h index 4ee7bcae07e..8381b03d8ec 100644 --- a/include/spl/sys/kmem_cache.h +++ b/include/spl/sys/kmem_cache.h @@ -35,7 +35,7 @@ * size. This slab implementation also supports both constructors and * destructors which the Linux slab does not. */ -enum { +typedef enum kmc_bit { KMC_BIT_NOTOUCH = 0, /* Don't update ages */ KMC_BIT_NODEBUG = 1, /* Default behavior */ KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */ @@ -52,7 +52,7 @@ enum { KMC_BIT_TOTAL = 18, /* Proc handler helper bit */ KMC_BIT_ALLOC = 19, /* Proc handler helper bit */ KMC_BIT_MAX = 20, /* Proc handler helper bit */ -}; +} kmc_bit_t; /* kmem move callback return values */ typedef enum kmem_cbrc { From 601dd2a5042edb962422f4a92f360e7303208c15 Mon Sep 17 00:00:00 2001 From: John Wren Kennedy Date: Fri, 18 Oct 2019 19:27:02 +0200 Subject: [PATCH 250/325] ZTS: Written props test fails with 4k disks With 4k disks, this test will fail in the last section because the expected human readable value of 20.0M is reported as 20.1M. Rather than use the human readable property, switch to the parsable property and verify that the values are reasonably close. Reviewed-by: Igor Kozhukhov Reviewed-by: Ryan Moeller Reviewed-by: Tony Hutter Signed-off-by: John Kennedy Closes #9477 --- .../zfs_property/zfs_written_property_001_pos.ksh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh index 9a2d3cb8025..cbbacace1ec 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh @@ -11,7 +11,7 @@ # # -# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2012, 2017 by Delphix. All rights reserved. # # @@ -216,15 +216,15 @@ for ds in $datasets; do count=$blocks sync_pool done -recursive_output=$(zfs get -r written@current $TESTPOOL | \ +recursive_output=$(zfs get -p -r written@current $TESTPOOL | \ grep -v $TESTFS1@ | grep -v $TESTFS2@ | grep -v $TESTFS3@ | \ grep -v "VALUE" | grep -v "-") -expected="20.0M" +expected="$((20 * mb_block))" for ds in $datasets; do writtenat=$(echo "$recursive_output" | grep -v $ds/) writtenat=$(echo "$writtenat" | grep $ds | awk '{print $3}') - [[ $writtenat == $expected ]] || \ - log_fail "recursive written property output mismatch" + within_percent $writtenat $expected 99.5 || \ + log_fail "Unexpected written@ value on $ds" done log_pass "zfs written and written@ property fields print correct values" From 09015c212f555134f988d4d0245555081a25a959 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Sun, 20 Oct 2019 20:37:30 -0700 Subject: [PATCH 251/325] Use correct format string when printing int8 Reviewed-by: Igor Kozhukhov Reviewed-by: Ryan Moeller Reviewed-by: George Melikov Reviewed-by: Brian Behlendorf Signed-off-by: Matt Macy Closes #9486 --- lib/libnvpair/libnvpair_json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libnvpair/libnvpair_json.c b/lib/libnvpair/libnvpair_json.c index 0b403f1af35..37a392391fb 100644 --- a/lib/libnvpair/libnvpair_json.c +++ b/lib/libnvpair/libnvpair_json.c @@ -303,7 +303,7 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) for (i = 0; i < valsz; i++) { if (i > 0) FPRINTF(fp, ","); - FPRINTF(fp, "%hd", val[i]); + FPRINTF(fp, "%hhd", val[i]); } FPRINTF(fp, "]"); break; From 635bf1c37cf99faa20411911c52fe81b48b9a053 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Thu, 24 Oct 2019 13:47:47 -0400 Subject: [PATCH 252/325] ZTS: Consistency pass for .ksh extensions * Use .ksh extension for ksh scripts, not .sh * Remove .ksh extension from tests in common.run Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #9502 --- tests/runfiles/linux.run | 6 +++--- .../tests/functional/cli_root/zfs_mount/Makefile.am | 2 +- .../{zfs_mount_test_race.sh => zfs_mount_test_race.ksh} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename tests/zfs-tests/tests/functional/cli_root/zfs_mount/{zfs_mount_test_race.sh => zfs_mount_test_race.ksh} (100%) diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 04ec2936bb3..8169ad57ffa 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -267,8 +267,8 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', tags = ['functional', 'cli_root', 'zfs_snapshot'] [tests/functional/cli_root/zfs_sysfs] -tests = ['zfeature_set_unsupported.ksh', 'zfs_get_unsupported', - 'zfs_set_unsupported', 'zfs_sysfs_live.ksh', 'zpool_get_unsupported', +tests = ['zfeature_set_unsupported', 'zfs_get_unsupported', + 'zfs_set_unsupported', 'zfs_sysfs_live', 'zpool_get_unsupported', 'zpool_set_unsupported'] tags = ['functional', 'cli_root', 'zfs_sysfs'] @@ -639,7 +639,7 @@ tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count', tags = ['functional', 'limits'] [tests/functional/link_count] -tests = ['link_count_001', 'link_count_root_inode.ksh'] +tests = ['link_count_001', 'link_count_root_inode'] tags = ['functional', 'link_count'] [tests/functional/migration] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am index c208a1c378d..8a137b8303c 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am @@ -19,7 +19,7 @@ dist_pkgdata_SCRIPTS = \ zfs_mount_all_mountpoints.ksh \ zfs_mount_encrypted.ksh \ zfs_mount_remount.ksh \ - zfs_mount_test_race.sh \ + zfs_mount_test_race.ksh \ zfs_multi_mount.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh rename to tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh From 635603a1c2a4ee510671e422f4ccef22cf695320 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 24 Oct 2019 13:51:01 -0400 Subject: [PATCH 253/325] Fix incremental recursive encrypted receive Currently, incremental recursive encrypted receives fail to work for any snapshot after the first. The reason for this is because the check in zfs_setup_cmdline_props() did not properly realize that when the user attempts to use '-x encryption' in this situation, they are not really overriding the existing encryption property and instead are attempting to prevent it from changing. This resulted in an error message stating: "encryption property 'encryption' cannot be set or excluded for raw or incremental streams". This problem is fixed by updating the logic to expect this use case. Reviewed-by: loli10K Reviewed-by: Brian Behlendorf Reviewed-by: Igor Kozhukhov Signed-off-by: Tom Caputi Closes #9494 --- lib/libzfs/libzfs_sendrecv.c | 22 ++++++++++++++----- .../functional/rsend/send_encrypted_props.ksh | 18 ++++++++++++++- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 20a59ef6cff..a9323579877 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -3659,11 +3659,21 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, /* raw streams can't override encryption properties */ if ((zfs_prop_encryption_key_param(prop) || - prop == ZFS_PROP_ENCRYPTION) && (raw || !newfs)) { + prop == ZFS_PROP_ENCRYPTION) && raw) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "encryption property '%s' cannot " - "be set or excluded for raw or incremental " - "streams."), name); + "be set or excluded for raw streams."), name); + ret = zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + /* incremental streams can only exclude encryption properties */ + if ((zfs_prop_encryption_key_param(prop) || + prop == ZFS_PROP_ENCRYPTION) && !newfs && + nvpair_type(nvp) != DATA_TYPE_BOOLEAN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption property '%s' cannot " + "be set for incremental streams."), name); ret = zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } @@ -3681,10 +3691,12 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, */ if (nvlist_exists(origprops, name)) { nvlist_t *attrs; + char *source = NULL; attrs = fnvlist_lookup_nvlist(origprops, name); - if (strcmp(fnvlist_lookup_string(attrs, - ZPROP_SOURCE), ZPROP_SOURCE_VAL_RECVD) != 0) + if (nvlist_lookup_string(attrs, + ZPROP_SOURCE, &source) == 0 && + strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0) continue; } /* diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh index 4c90ba95bf9..8e21acd99d2 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh @@ -58,7 +58,8 @@ log_assert "'zfs recv' must properly handle encryption properties" typeset keyfile=/$TESTPOOL/pkey typeset sendfile=/$TESTPOOL/sendfile -typeset snap=$TESTPOOL/ds@snap +typeset snap=$TESTPOOL/ds@snap1 +typeset snap2=$TESTPOOL/ds@snap2 typeset esnap=$TESTPOOL/crypt@snap1 typeset esnap2=$TESTPOOL/crypt@snap2 @@ -78,6 +79,7 @@ log_must cp /$TESTPOOL/ds/$TESTFILE0 /$TESTPOOL/crypt/$TESTFILE0 typeset cksum=$(md5digest /$TESTPOOL/ds/$TESTFILE0) log_must zfs snap -r $snap +log_must zfs snap -r $snap2 log_must zfs snap -r $esnap log_must zfs snap -r $esnap2 @@ -193,6 +195,20 @@ recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds +# Test that we can override an unencrypted, incremental, recursive stream's +# encryption settings, receiving all datasets as encrypted children. +log_note "Must be able to receive recursive stream to encrypted child" +ds=$TESTPOOL/crypt/recv +log_must eval "zfs send -R $snap2 > $sendfile" +log_must eval "zfs recv -x encryption $ds < $sendfile" +log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" +log_must test "$(get_prop 'mounted' $ds)" == "yes" +recv_cksum=$(md5digest /$ds/$TESTFILE0) +log_must test "$recv_cksum" == "$cksum" +log_must zfs destroy -r $ds + # Check that we haven't printed the key to the zpool history log log_mustnot eval "zpool history -i | grep -q 'wkeydata'" From c6eaa8b7f98d790f69d5bb7f74133f4306eb7c16 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Mon, 28 Oct 2019 09:49:44 -0700 Subject: [PATCH 254/325] Fix zpool history unbounded memory usage In original implementation, zpool history will read the whole history before printing anything, causing memory usage goes unbounded. We fix this by breaking it into read-print iterations. Reviewed-by: Tom Caputi Reviewed-by: Matt Ahrens Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: Chunwei Chen Closes #9516 --- cmd/zpool/zpool_main.c | 44 +++++++++++++++++++++++++--------------- include/libzfs.h | 3 ++- lib/libzfs/libzfs_pool.c | 20 ++++++++++-------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 2c5c88e694b..3b7aaa0e73a 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -8419,24 +8419,12 @@ typedef struct hist_cbdata { boolean_t internal; } hist_cbdata_t; -/* - * Print out the command history for a specific pool. - */ -static int -get_history_one(zpool_handle_t *zhp, void *data) +static void +print_history_records(nvlist_t *nvhis, hist_cbdata_t *cb) { - nvlist_t *nvhis; nvlist_t **records; uint_t numrecords; - int ret, i; - hist_cbdata_t *cb = (hist_cbdata_t *)data; - - cb->first = B_FALSE; - - (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); - - if ((ret = zpool_get_history(zhp, &nvhis)) != 0) - return (ret); + int i; verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); @@ -8540,8 +8528,32 @@ get_history_one(zpool_handle_t *zhp, void *data) (void) printf("]"); (void) printf("\n"); } +} + +/* + * Print out the command history for a specific pool. + */ +static int +get_history_one(zpool_handle_t *zhp, void *data) +{ + nvlist_t *nvhis; + int ret; + hist_cbdata_t *cb = (hist_cbdata_t *)data; + uint64_t off = 0; + boolean_t eof = B_FALSE; + + cb->first = B_FALSE; + + (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); + + while (!eof) { + if ((ret = zpool_get_history(zhp, &nvhis, &off, &eof)) != 0) + return (ret); + + print_history_records(nvhis, cb); + nvlist_free(nvhis); + } (void) printf("\n"); - nvlist_free(nvhis); return (ret); } diff --git a/include/libzfs.h b/include/libzfs.h index fed4eda0074..19c16b86a7a 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -425,7 +425,8 @@ typedef enum { extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, int name_flags); extern int zpool_upgrade(zpool_handle_t *, uint64_t); -extern int zpool_get_history(zpool_handle_t *, nvlist_t **); +extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *, + boolean_t *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned, int); extern int zpool_events_clear(libzfs_handle_t *, int *); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 29e6f0fd23d..1bd2b44bed8 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -4305,33 +4305,37 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) * Retrieve the command history of a pool. */ int -zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) +zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off, + boolean_t *eof) { char *buf; int buflen = 128 * 1024; - uint64_t off = 0; nvlist_t **records = NULL; uint_t numrecords = 0; int err, i; + uint64_t start = *off; buf = malloc(buflen); if (buf == NULL) return (ENOMEM); - do { + /* process about 1MB a time */ + while (*off - start < 1024 * 1024) { uint64_t bytes_read = buflen; uint64_t leftover; - if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0) + if ((err = get_history(zhp, buf, off, &bytes_read)) != 0) break; /* if nothing else was read in, we're at EOF, just return */ - if (!bytes_read) + if (!bytes_read) { + *eof = B_TRUE; break; + } if ((err = zpool_history_unpack(buf, bytes_read, &leftover, &records, &numrecords)) != 0) break; - off -= leftover; + *off -= leftover; if (leftover == bytes_read) { /* * no progress made, because buffer is not big enough @@ -4343,9 +4347,7 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) if (buf == NULL) return (ENOMEM); } - - /* CONSTCOND */ - } while (1); + } free(buf); From 7e1b772eddf062660a09d2ba6f802022ca93dc90 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Wed, 30 Oct 2019 14:27:28 -0400 Subject: [PATCH 255/325] Fix 'zfs change-key' with unencrypted child Currently, when you call 'zfs change-key' on an encrypted dataset that has an unencrypted child, the code will trigger a VERIFY. This VERIFY is leftover from before we allowed unencrypted datasets to exist underneath encrypted ones. This patch fixes the issue by simply replacing the VERIFY with an early return when recursing through datasets. Reviewed by: Jason King Reviewed-by: Brian Behlendorf Reviewed-by: Igor Kozhukhov Signed-off-by: Tom Caputi Closes #9524 --- module/zfs/dsl_crypt.c | 8 ++++++-- .../zfs_change-key/zfs_change-key_child.ksh | 19 ++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 1545af53af7..162a3613c28 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1430,6 +1430,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, uint64_t new_rddobj, dsl_wrapping_key_t *wkey, boolean_t skip, dmu_tx_t *tx) { + int ret; zap_cursor_t *zc; zap_attribute_t *za; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1448,12 +1449,15 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, return; } + ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); + VERIFY(ret == 0 || ret == ENOENT); + /* * Stop recursing if this dsl dir didn't inherit from the root * or if this dd is a clone. */ - VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); - if (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd))) { + if (ret == ENOENT || + (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd)))) { dsl_dir_rele(dd, FTAG); return; } diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh index dda7c1df433..a886ab8a779 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh @@ -28,13 +28,15 @@ # STRATEGY: # 1. Create an encrypted dataset # 2. Create an encrypted child dataset -# 3. Attempt to change the key without any flags -# 4. Attempt to change the key specifying keylocation -# 5. Attempt to change the key specifying keyformat -# 6. Verify the new encryption root can unload and load its key -# 7. Recreate the child dataset -# 8. Attempt to change the key specifying both the keylocation and keyformat -# 9. Verify the new encryption root can unload and load its key +# 3. Create an unencrypted child dataset +# 4. Attempt to change the key without any flags +# 5. Attempt to change the key specifying keylocation +# 6. Attempt to change the key specifying keyformat +# 7. Verify the new encryption root can unload and load its key +# 8. Recreate the child dataset +# 9. Attempt to change the key specifying both the keylocation and keyformat +# 10. Verify the new encryption root can unload and load its key +# 11. Verify the unencrytped child is still accessible normally # verify_runnable "both" @@ -53,6 +55,7 @@ log_assert "'zfs change-key' should promote an encrypted child to an" \ log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" log_must zfs create $TESTPOOL/$TESTFS1/child +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/child2 log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \ "$TESTPOOL/$TESTFS1/child" @@ -82,5 +85,7 @@ log_must key_unavailable $TESTPOOL/$TESTFS1/child log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" log_must key_available $TESTPOOL/$TESTFS1/child +log_must zfs unmount $TESTPOOL/$TESTFS1/child2 +log_must zfs mount $TESTPOOL/$TESTFS1/child2 log_pass "'zfs change-key' promotes an encrypted child to an encryption root" From 123aa2fc14c649d894783996bcadf7812c0996e7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 30 Oct 2019 12:37:49 -0700 Subject: [PATCH 256/325] Fix contrib/zcp/Makefile.am Remove the stray leading + from the Makefile. This was preventing the autosnap.lua channel program from being properly included by `make dist`. Reviewed-by: Giuseppe Di Natale Signed-off-by: Brian Behlendorf Closes #9527 --- contrib/zcp/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/zcp/Makefile.am b/contrib/zcp/Makefile.am index 54d65f891e3..e6a777ad7ba 100644 --- a/contrib/zcp/Makefile.am +++ b/contrib/zcp/Makefile.am @@ -1 +1 @@ -+EXTRA_DIST = autosnap.lua +EXTRA_DIST = autosnap.lua From 5187a14f54d9ddf72405d05e5f953151f787c3c1 Mon Sep 17 00:00:00 2001 From: alaviss Date: Wed, 30 Oct 2019 21:38:41 +0000 Subject: [PATCH 257/325] dracut/zfs-load-key.sh: properly remove prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the 'ZFS=' prefix from $BOOTFS instead of $root. This makes sure that the 'zfs:' prefix remains stripped so that users with 'root=zfs:dataset' cmdline can have key loaded on boot again. Reviewed-by: Garrett Fields Reviewed-by: Dacian Reece-Stremtan Reviewed-by: Brian Behlendorf Signed-off-by: Hiếu Lê Closes #9520 --- contrib/dracut/90zfs/zfs-load-key.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 42dc1d08f3d..3f466798e72 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -30,7 +30,7 @@ if [ "${root}" = "zfs:AUTO" ] ; then BOOTFS="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')" else BOOTFS="${root##zfs:}" - BOOTFS="${root##ZFS=}" + BOOTFS="${BOOTFS##ZFS=}" fi # if pool encryption is active and the zfs command understands '-o encryption' From ca0f9b74733e74bcb17fe305ce9c484cd42d83c3 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Thu, 31 Oct 2019 10:09:01 -0700 Subject: [PATCH 258/325] Include prototypes for vdev_initialize Address two prototype related warnings emitted by clang. Reviewed-by: Brian Behlendorf Reviewed-by: Igor Kozhukhov Signed-off-by: Matt Macy Closes #9535 --- module/zfs/vdev_initialize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c index 803d97c297c..9958a295832 100644 --- a/module/zfs/vdev_initialize.c +++ b/module/zfs/vdev_initialize.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * Value that is written to disk during initialization. @@ -415,7 +416,7 @@ vdev_initialize_load(vdev_t *vd) * Convert the logical range into a physical range and add it to our * avl tree. */ -void +static void vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size) { vdev_t *vd = arg; From 1253fcc70ac91d43c382edce4b06633466aa6f3f Mon Sep 17 00:00:00 2001 From: "M. Zhou" <5723047+cdluminate@users.noreply.github.com> Date: Wed, 6 Nov 2019 19:36:33 +0000 Subject: [PATCH 259/325] Add a notice in /etc/defaults/zfs for systemd users Some systemd users may want to change configurations in /etc/defaults/zfs, but these settings won't affect systemd services. Reviewed-by: Brian Behlendorf Signed-off-by: Mo Zhou Closes #9544 --- etc/init.d/zfs.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/etc/init.d/zfs.in b/etc/init.d/zfs.in index d4ad1beaaa2..42fc1161c72 100644 --- a/etc/init.d/zfs.in +++ b/etc/init.d/zfs.in @@ -1,5 +1,11 @@ # ZoL userland configuration. +# NOTE: This file is intended for sysv init and initramfs. +# Changing some of these settings may not make any difference on +# systemd-based setup, e.g. setting ZFS_MOUNT=no will not prevent systemd +# from launching zfs-mount.service during boot. +# See: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=901436 + # To enable a boolean setting, set it to yes, on, true, or 1. # Anything else will be interpreted as unset. From 618206c0b922a1e27a942036b893bfb98570e244 Mon Sep 17 00:00:00 2001 From: Witaut Bajaryn Date: Fri, 8 Nov 2019 23:34:07 +0100 Subject: [PATCH 260/325] Skip loading already loaded key Don't ask for the password / try to load the key if the key for the encryptionroot is already loaded. The user might have loaded the key manually or by other means before the scripts get called. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Reviewed-by: Richard Laager Signed-off-by: Witaut Bajaryn Closes #9495 Closes #9529 --- contrib/dracut/90zfs/mount-zfs.sh.in | 14 +++++++++----- contrib/dracut/90zfs/zfs-load-key.sh.in | 3 +++ contrib/initramfs/scripts/zfs.in | 3 +++ .../system-generators/zfs-mount-generator.in | 2 ++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/contrib/dracut/90zfs/mount-zfs.sh.in b/contrib/dracut/90zfs/mount-zfs.sh.in index 23f7e3e295e..73300a9b663 100755 --- a/contrib/dracut/90zfs/mount-zfs.sh.in +++ b/contrib/dracut/90zfs/mount-zfs.sh.in @@ -62,11 +62,15 @@ if import_pool "${ZFS_POOL}" ; then # if the root dataset has encryption enabled ENCRYPTIONROOT="$(zfs get -H -o value encryptionroot "${ZFS_DATASET}")" if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - # decrypt them - ask_for_password \ - --tries 5 \ - --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \ - --cmd "zfs load-key '${ENCRYPTIONROOT}'" + KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" + # if the key needs to be loaded + if [ "$KEYSTATUS" = "unavailable" ]; then + # decrypt them + ask_for_password \ + --tries 5 \ + --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \ + --cmd "zfs load-key '${ENCRYPTIONROOT}'" + fi fi fi # Let us tell the initrd to run on shutdown. diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 3f466798e72..88f43b6edc6 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -38,6 +38,9 @@ if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{prin # if the root dataset has encryption enabled ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}") if ! [ "${ENCRYPTIONROOT}" = "-" ]; then + KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" + # continue only if the key needs to be loaded + [ "$KEYSTATUS" = "unavailable" ] || exit 0 # decrypt them TRY_COUNT=5 while [ $TRY_COUNT -gt 0 ]; do diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index c82b210d7e9..523694473a6 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -414,6 +414,9 @@ decrypt_fs() # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then + KEYSTATUS="$(${ZFS} get -H -o value keystatus "${ENCRYPTIONROOT}")" + # Continue only if the key needs to be loaded + [ "$KEYSTATUS" = "unavailable" ] || return 0 TRY_COUNT=3 # Prompt with plymouth, if active if [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 3e529cb67bb..be2c1420cd7 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -182,6 +182,8 @@ process_line() { keyloadcmd="@sbindir@/zfs load-key '${dataset}'" elif [ "${p_keyloc}" = "prompt" ] ; then keyloadcmd="sh -c 'set -eu;"\ +"keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ +"[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ "count=0;"\ "while [ \$\$count -lt 3 ];do"\ " systemd-ask-password --id=\"zfs:${dataset}\""\ From edaec8422507660f6b78587f7e4bf50976d11c64 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 11 Nov 2019 20:27:59 +0300 Subject: [PATCH 261/325] Improve logging of 128KB writes Before my ZIL space optimization few years ago 128KB writes were logged as two 64KB+ records in two 128KB log blocks. After that change it became ~127KB+/1KB+ in two 128KB log blocks to free space in the second block for another record. Unfortunately in case of 128KB only writes, when space in the second block remained unused, that change increased write latency by unbalancing checksum computation and write times between parallel threads. It also didn't help with SLOG space efficiency in that case. This change introduces new 68KB log block size, used for both writes below 67KB and 128KB-sharp writes. Writes of 68-127KB are still using one 128KB block to not increase processing overhead. Writes above 131KB are still using full 128KB blocks, since possible saving there is small. Mixed loads will likely also fall back to previous 128KB, since code uses maximum of the last 16 requested block sizes. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #9409 --- module/zfs/zil.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 8411e333b18..c4d7d6ed1df 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1416,11 +1416,17 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) * aligned to 4KB) actually gets written. However, we can't always just * allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted. */ -uint64_t zil_block_buckets[] = { - 4096, /* non TX_WRITE */ - 8192+4096, /* data base */ - 32*1024 + 4096, /* NFS writes */ - UINT64_MAX +struct { + uint64_t limit; + uint64_t blksz; +} zil_block_buckets[] = { + { 4096, 4096 }, /* non TX_WRITE */ + { 8192 + 4096, 8192 + 4096 }, /* database */ + { 32768 + 4096, 32768 + 4096 }, /* NFS writes */ + { 65536 + 4096, 65536 + 4096 }, /* 64KB writes */ + { 131072, 131072 }, /* < 128KB writes */ + { 131072 +4096, 65536 + 4096 }, /* 128KB writes */ + { UINT64_MAX, SPA_OLD_MAXBLOCKSIZE}, /* > 128KB writes */ }; /* @@ -1504,9 +1510,9 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) * pool log space. */ zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t); - for (i = 0; zil_blksz > zil_block_buckets[i]; i++) + for (i = 0; zil_blksz > zil_block_buckets[i].limit; i++) continue; - zil_blksz = MIN(zil_block_buckets[i], zilog->zl_max_block_size); + zil_blksz = MIN(zil_block_buckets[i].blksz, zilog->zl_max_block_size); zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; for (i = 0; i < ZIL_PREV_BLKS; i++) zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); From 2c7549fb6fdaf8a60e9876b512e1f3e1a83b944f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 13 Nov 2019 09:23:14 -0800 Subject: [PATCH 262/325] Fix `zpool create -o ` error message When `zpool create -o ` is run without root permissions and the pool property requested is not specifically enumerated in zpool_valid_proplist(). Then an incorrect error message referring to an invalid property is printed rather than the expected permission denied error. Specifying a pool property at create time should be handled the same way as filesystem properties in zfs_valid_proplist(). There should not be default zfs_error_aux() set for properties which are not listed. Reviewed-by: loli10K Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9550 Closes #9568 --- lib/libzfs/libzfs_pool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 1bd2b44bed8..d45b87ce652 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -734,8 +734,6 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, break; default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s'(%d) not defined"), propname, prop); break; } } From 1545f7c59d293e3ec5848b35f5c92b6e9a6e214a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Sun, 21 Jul 2019 19:34:02 +0200 Subject: [PATCH 263/325] Add missing documentation for some KMC flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Matt Ahrens Signed-off-by: Sebastian Gottschall Signed-off-by: Michael Niewöhner Closes #9034 --- module/spl/spl-kmem-cache.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index ff3a2a9de09..9acff541449 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -862,11 +862,11 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) * KMC_VMEM Force SPL vmem backed cache * KMC_SLAB Force Linux slab backed cache * KMC_OFFSLAB Locate objects off the slab - * KMC_NOTOUCH unsupported - * KMC_NODEBUG unsupported - * KMC_NOHASH unsupported - * KMC_QCACHE unsupported - * KMC_NOMAGAZINE unsupported + * KMC_NOTOUCH Disable cache object aging (unsupported) + * KMC_NODEBUG Disable debugging (unsupported) + * KMC_NOHASH Disable hashing (unsupported) + * KMC_QCACHE Disable qcache (unsupported) + * KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab */ spl_kmem_cache_t * spl_kmem_cache_create(char *name, size_t size, size_t align, From 880a37aa357fa91b36b67cc2a579bc68ff258368 Mon Sep 17 00:00:00 2001 From: loli10K Date: Wed, 13 Nov 2019 19:19:06 +0100 Subject: [PATCH 264/325] Prevent NULL pointer dereference in blkg_tryget() on EL8 kernels blkg_tryget() as shipped in EL8 kernels does not seem to handle NULL @blkg as input; this is different from its mainline counterpart where NULL is accepted. To prevent dereferencing a NULL pointer when dealing with block devices which do not set a root_blkg on the request queue perform the NULL check in vdev_bio_associate_blkg(). Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Reviewed-by: Tony Hutter Signed-off-by: loli10K Closes #9546 Closes #9577 --- module/zfs/vdev_disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 823a54c12e9..5786169bc2f 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -545,7 +545,7 @@ vdev_bio_associate_blkg(struct bio *bio) ASSERT3P(q, !=, NULL); ASSERT3P(bio->bi_blkg, ==, NULL); - if (blkg_tryget(q->root_blkg)) + if (q->root_blkg && blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; } #define bio_associate_blkg vdev_bio_associate_blkg From 6657800745f0d9b312323758cca16bcf88bc4518 Mon Sep 17 00:00:00 2001 From: Kjeld Schouten Date: Wed, 13 Nov 2019 19:23:23 +0100 Subject: [PATCH 265/325] Change zed.service to zfs-zed.service in man page zed.service does not exist replaced with correct service name in man. Reviewed-by: Brian Behlendorf Signed-off-by: Kjeld Schouten-Lebbing Closes #9581 --- man/man8/zfs-mount-generator.8.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index 48e4e2dfac2..a696eb4617d 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -58,9 +58,9 @@ Then, enable the tracking ZEDLET: .RS 4 ln -s "@zfsexecdir@/zed.d/history_event-zfs-list-cacher.sh" "@sysconfdir@/zfs/zed.d" -systemctl enable zed.service +systemctl enable zfs-zed.service -systemctl restart zed.service +systemctl restart zfs-zed.service .RE .PP Force the running of the ZEDLET by setting canmount=on for at least one dataset in the pool: From ef0b539581cd385b08b11bfdad152b15eaf9aadd Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 15 Nov 2019 18:52:11 +0100 Subject: [PATCH 266/325] Remove inappropiate error message suggesting to use '-r' Removes an incorrect error message from libzfs that suggests applying '-r' when a zfs subcommand is called with a filesystem path while expecting either a snapshot or bookmark path. Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Signed-off-by: InsanePrawn Closes #9574 --- lib/libzfs/libzfs_dataset.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index cc2f61a0d7d..7f33e244797 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -119,8 +119,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '@' delimiter in snapshot name, " - "did you mean to use -r?")); + "missing '@' delimiter in snapshot name")); return (0); } @@ -134,8 +133,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, if (type == ZFS_TYPE_BOOKMARK && strchr(path, '#') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '#' delimiter in bookmark name, " - "did you mean to use -r?")); + "missing '#' delimiter in bookmark name")); return (0); } From 20e124dd7151a4082ed0296e5ec872cf970fc555 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Fri, 15 Nov 2019 14:56:05 -0300 Subject: [PATCH 267/325] Break out of zfs_zget early if unlinked znode If zp->z_unlinked is set, we're working with a znode that has been marked for deletion. If that's the case, we can skip the "goto again" loop and return ENOENT, as the znode should not be discovered. Reviewed-by: Richard Yao Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Heitor Alves de Siqueira Closes #9583 --- module/zfs/zfs_znode.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 7a72d953a2a..fa6072ade1f 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1093,6 +1093,9 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); /* + * If zp->z_unlinked is set, the znode is already marked + * for deletion and should not be discovered. + * * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. @@ -1106,19 +1109,24 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ - if (igrab(ZTOI(zp)) == NULL) { - mutex_exit(&zp->z_lock); - sa_buf_rele(db, NULL); - zfs_znode_hold_exit(zfsvfs, zh); - /* inode might need this to finish evict */ - cond_resched(); - goto again; + if (zp->z_unlinked) { + err = SET_ERROR(ENOENT); + } else if (igrab(ZTOI(zp)) == NULL) { + err = SET_ERROR(EAGAIN); + } else { + *zpp = zp; + err = 0; } - *zpp = zp; - err = 0; + mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); zfs_znode_hold_exit(zfsvfs, zh); + + if (err == EAGAIN) { + /* inode might need this to finish evict */ + cond_resched(); + goto again; + } return (err); } From 7191f049d55d71d942d186f59f4795a2cbd1a5ae Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Tue, 19 Nov 2019 01:44:28 +0100 Subject: [PATCH 268/325] Remove requirement for -d 1 for zfs list and zfs get with bookmarks df58307 removed the need to specify -d 1 when zfs list and zfs get are called with -t snapshot on a datset. This commit extends the same behaviour to -t bookmark. This commit also introduces the 'snap' shorthand for snapshots from zfs list to zfs get. Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Reviewed-by: Kjeld Schouten Signed-off-by: InsanePrawn Closes #9589 --- cmd/zfs/zfs_main.c | 23 ++++++++++--------- .../cli_root/zfs_get/zfs_get_009_pos.ksh | 11 +++++++++ .../cli_root/zfs_get/zfs_get_list_d.kshlib | 3 ++- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index cd8c0aca076..ced60a690e1 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -1882,7 +1882,7 @@ zfs_do_get(int argc, char **argv) flags &= ~ZFS_ITER_PROP_LISTSNAPS; while (*optarg != '\0') { static char *type_subopts[] = { "filesystem", - "volume", "snapshot", "bookmark", + "volume", "snapshot", "snap", "bookmark", "all", NULL }; switch (getsubopt(&optarg, type_subopts, @@ -1894,12 +1894,13 @@ zfs_do_get(int argc, char **argv) types |= ZFS_TYPE_VOLUME; break; case 2: + case 3: types |= ZFS_TYPE_SNAPSHOT; break; - case 3: + case 4: types |= ZFS_TYPE_BOOKMARK; break; - case 4: + case 5: types = ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK; break; @@ -1932,11 +1933,11 @@ zfs_do_get(int argc, char **argv) fields = argv[0]; /* - * Handle users who want to get all snapshots of the current - * dataset (ex. 'zfs get -t snapshot refer '). + * Handle users who want to get all snapshots or bookmarks + * of a dataset (ex. 'zfs get -t snapshot refer '). */ - if (types == ZFS_TYPE_SNAPSHOT && argc > 1 && - (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { + if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) && + argc > 1 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE); limit = 1; } @@ -3436,11 +3437,11 @@ zfs_do_list(int argc, char **argv) types &= ~ZFS_TYPE_SNAPSHOT; /* - * Handle users who want to list all snapshots of the current - * dataset (ex. 'zfs list -t snapshot '). + * Handle users who want to list all snapshots or bookmarks + * of the current dataset (ex. 'zfs list -t snapshot '). */ - if (types == ZFS_TYPE_SNAPSHOT && argc > 0 && - (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { + if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) && + argc > 0 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE); limit = 1; } diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh index 2d97c5918ac..d4ebbb155ef 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh @@ -92,5 +92,16 @@ log_must eval "zfs get -H -t snapshot -o name creation $DEPTH_FS > $DEPTH_OUTPUT log_must eval "zfs get -H -t snapshot -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT +# Ensure 'zfs get -t snap' works as a shorthand for 'zfs get -t snapshot' +log_must eval "zfs get -H -t snap -d 1 -o name creation $DEPTH_FS > $DEPTH_OUTPUT" +log_must eval "zfs get -H -t snapshot -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" +log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT + +# Ensure 'zfs get -t bookmark ' works as though -d 1 was specified +log_must eval "zfs get -H -t bookmark -o name creation $DEPTH_FS > $DEPTH_OUTPUT" +log_must eval "zfs get -H -t bookmark -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" +log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT + + log_pass "'zfs get -d ' should get expected output." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib index 8ef8d9aa160..48b3268f781 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib @@ -37,7 +37,7 @@ set -A depth_options "d 0" "d 1" "d 2" "d 4" "d 32" set -A depth_array 0 1 2 4 32 # -# Setup multiple depths datasets, including fs, volume and snapshot. +# Setup multiple depths datasets, including fs, volumes, snapshots and bookmarks. # function depth_fs_setup { @@ -65,6 +65,7 @@ function depth_fs_setup log_must zfs create -V 8M $fs/vol_"$j"_depth"$i" fi log_must zfs snapshot $fs@snap_"$j"_depth"$i" + log_must zfs bookmark $fs@snap_"$j"_depth"$i" '#bookmark'_"$j"_depth"$i" (( j=j+1 )) done done From e688774ea66d6203ccbe883fc69a9b86d9edf386 Mon Sep 17 00:00:00 2001 From: George Melikov Date: Wed, 20 Nov 2019 03:23:27 +0300 Subject: [PATCH 269/325] ZTS: Casenorm fix unicode interpretation Use `printf` to properly interpret unicode characters. Illumos uses a utility called `zlook` to allow additional flags to be provided to readdir and lookup for testing. This functionality could be ported to Linux, but even without it several of the tests can be enabled by instead using the standard `test` command. Additional, work is required to enable the remaining test cases. Reviewed-by: Igor Kozhukhov Reviewed-by: Brian Behlendorf Signed-off-by: George Melikov Issue #7633 Closes #8812 --- tests/test-runner/bin/zts-report.py | 8 -------- .../tests/functional/casenorm/casenorm.cfg | 16 ++++++++++------ .../tests/functional/casenorm/casenorm.kshlib | 12 ++++++++++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py index c5c869045ed..600079fbee8 100755 --- a/tests/test-runner/bin/zts-report.py +++ b/tests/test-runner/bin/zts-report.py @@ -160,17 +160,9 @@ # reasons listed above can be used. # known = { - 'casenorm/sensitive_none_lookup': ['FAIL', '7633'], - 'casenorm/sensitive_none_delete': ['FAIL', '7633'], 'casenorm/sensitive_formd_lookup': ['FAIL', '7633'], 'casenorm/sensitive_formd_delete': ['FAIL', '7633'], - 'casenorm/insensitive_none_lookup': ['FAIL', '7633'], - 'casenorm/insensitive_none_delete': ['FAIL', '7633'], - 'casenorm/insensitive_formd_lookup': ['FAIL', '7633'], - 'casenorm/insensitive_formd_delete': ['FAIL', '7633'], - 'casenorm/mixed_none_lookup': ['FAIL', '7633'], 'casenorm/mixed_none_lookup_ci': ['FAIL', '7633'], - 'casenorm/mixed_none_delete': ['FAIL', '7633'], 'casenorm/mixed_formd_lookup': ['FAIL', '7633'], 'casenorm/mixed_formd_lookup_ci': ['FAIL', '7633'], 'casenorm/mixed_formd_delete': ['FAIL', '7633'], diff --git a/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg b/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg index 9e8e456863b..5d2efbf000b 100644 --- a/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg +++ b/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg @@ -17,12 +17,16 @@ # Copyright (c) 2016 by Delphix. All rights reserved. # -NAME_C_ORIG=$(echo 'F\0303\0257L\0303\0253N\0303\0204m\0303\0253') -NAME_C_UPPER=$(echo 'F\0303\0217L\0303\0213N\0303\0204M\0303\0213') -NAME_C_LOWER=$(echo 'f\0303\0257l\0303\0253n\0303\0244m\0303\0253') -NAME_D_ORIG=$(echo 'Fi\0314\0210Le\0314\0210NA\0314\0210me\0314\0210') -NAME_D_UPPER=$(echo 'FI\0314\0210LE\0314\0210NA\0314\0210ME\0314\0210') -NAME_D_LOWER=$(echo 'fi\0314\0210le\0314\0210na\0314\0210me\0314\0210') +# Ksh on linux may have locale env variables undefined +export LANG="C.UTF-8" +export LC_ALL="C.UTF-8" + +NAME_C_ORIG=$(printf '\u0046\u00ef\u004c\u00eb\u004e\u00c4\u006d\u00eb') +NAME_C_UPPER=$(printf '\u0046\u00cf\u004c\u00cb\u004e\u00c4\u004d\u00cb') +NAME_C_LOWER=$(printf '\u0066\u00ef\u006c\u00eb\u006e\u00e4\u006d\u00eb') +NAME_D_ORIG=$(printf '\u0046\u0069\u0308\u004c\u0065\u0308\u004e\u0041\u0308\u006d\u0065\u0308') +NAME_D_UPPER=$(printf '\u0046\u0049\u0308\u004c\u0045\u0308\u004e\u0041\u0308\u004d\u0045\u0308') +NAME_D_LOWER=$(printf '\u0066\u0069\u0308\u006c\u0065\u0308\u006e\u0061\u0308\u006d\u0065\u0308') NAMES_ORIG="$NAME_C_ORIG $NAME_D_ORIG" NAMES_UPPER="$NAME_C_UPPER $NAME_D_UPPER" NAMES_LOWER="$NAME_C_LOWER $NAME_D_LOWER" diff --git a/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib b/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib index 273522406b6..5b080165b9f 100644 --- a/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib +++ b/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib @@ -65,14 +65,22 @@ function lookup_file { typeset name=$1 - zlook -l $TESTDIR $name >/dev/null 2>&1 + if is_linux; then + test -f "${TESTDIR}/${name}" >/dev/null 2>&1 + else + zlook -l $TESTDIR $name >/dev/null 2>&1 + fi } function lookup_file_ci { typeset name=$1 - zlook -il $TESTDIR $name >/dev/null 2>&1 + if is_linux; then + test -f "${TESTDIR}/${name}" >/dev/null 2>&1 + else + zlook -il $TESTDIR $name >/dev/null 2>&1 + fi } function lookup_any From 6fed191975ede05267b31e7fd51f282d5ecaed8d Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 20 Nov 2019 17:26:32 -0800 Subject: [PATCH 270/325] ZTS: tst.terminate_by_signal increase test threshold The tst.terminate_by_signal test case may occasionally fail when running in a less consistent virtual environment. For all observed failures the process was terminated correctly but it took longer than expected resulting in too many snapshot being created. To minimize the likelyhood of this occuring increase the threshold from 50 to 90 snapshots. The larger limit will still verifiy that the channel program was correctly terminated early. Reviewed-by: Don Brady Reviewed-by: Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9601 --- .../channel_program/synctask_core/tst.terminate_by_signal.ksh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh index 74889eba805..0a5fb804ac3 100755 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh @@ -91,7 +91,7 @@ log_note "$snap_count snapshots created by ZCP" if [ "$snap_count" -eq 0 ]; then log_fail "Channel program failed to run." -elif [ "$snap_count" -gt 50 ]; then +elif [ "$snap_count" -gt 90 ]; then log_fail "Too many snapshots after a cancel ($snap_count)." else log_pass "Canceling a long-running channel program works." From bc21c56c2d8222fa961d04b17963f3a97af103c1 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 21 Nov 2019 17:24:03 -0300 Subject: [PATCH 271/325] Check for unlinked znodes after igrab() The changes in commit 41e1aa2a / PR #9583 introduced a regression on tmpfile_001_pos: fsetxattr() on a O_TMPFILE file descriptor started to fail with errno ENODATA: openat(AT_FDCWD, "/test", O_RDWR|O_TMPFILE, 0666) = 3 <...> fsetxattr(3, "user.test", <...>, 64, 0) = -1 ENODATA The originally proposed change on PR #9583 is not susceptible to it, so just move the code/if-checks around back in that way, to fix it. Reviewed-by: Pavel Snajdr Reviewed-by: Brian Behlendorf Original-patch-by: Heitor Alves de Siqueira Signed-off-by: Mauricio Faria de Oliveira Closes #9602 --- module/zfs/zfs_znode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index fa6072ade1f..59b7cd3c81d 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1094,7 +1094,8 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) ASSERT3U(zp->z_id, ==, obj_num); /* * If zp->z_unlinked is set, the znode is already marked - * for deletion and should not be discovered. + * for deletion and should not be discovered. Check this + * after checking igrab() due to fsetxattr() & O_TMPFILE. * * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has @@ -1109,10 +1110,11 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ - if (zp->z_unlinked) { - err = SET_ERROR(ENOENT); - } else if (igrab(ZTOI(zp)) == NULL) { - err = SET_ERROR(EAGAIN); + if (igrab(ZTOI(zp)) == NULL) { + if (zp->z_unlinked) + err = SET_ERROR(ENOENT); + else + err = SET_ERROR(EAGAIN); } else { *zpp = zp; err = 0; From c9ac5ec178c2ed8ed73b5e47d730c8e659d36f61 Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Wed, 27 Nov 2019 13:08:18 -0500 Subject: [PATCH 272/325] Add display of checksums to zdb -R The function zdb_read_block (zdb -R) was always intended to have a :c flag which would read the DVA and length supplied by the user, and display the checksum. Since we don't know which checksum goes with the data, we should calculate and display them all. For each checksum in the table, read in the data at the supplied DVA:length, calculate the checksum, and display it. Update the man page and create a zfs test for the new feature. Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Signed-off-by: Paul Zuchowski Closes #9607 --- cmd/zdb/zdb.c | 60 ++++++++++++++++- man/man8/zdb.8 | 2 + tests/runfiles/linux.run | 2 +- .../tests/functional/cli_root/zdb/Makefile.am | 3 +- .../functional/cli_root/zdb/zdb_checksum.ksh | 64 +++++++++++++++++++ 5 files changed, 127 insertions(+), 4 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 9744849083a..68122390e05 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -26,6 +26,7 @@ * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. * Copyright (c) 2015, 2017, Intel Corporation. + * Copyright (c) 2019 Datto Inc. */ #include @@ -5591,7 +5592,7 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) * size - Amount of data to read, in hex, in bytes * flags - A string of characters specifying options * b: Decode a blkptr at given offset within block - * *c: Calculate and display checksums + * c: Calculate and display checksums * d: Decompress data before dumping * e: Byteswap data before dumping * g: Display data as a gang block header @@ -5599,7 +5600,6 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) * p: Do I/O to physical offset * r: Dump raw data to stdout * - * * = not yet implemented */ static void zdb_read_block(char *thing, spa_t *spa) @@ -5817,6 +5817,62 @@ zdb_read_block(char *thing, spa_t *spa) else zdb_dump_block(thing, buf, size, flags); + /* + * If :c was specified, iterate through the checksum table to + * calculate and display each checksum for our specified + * DVA and length. + */ + if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) && + !(flags & ZDB_FLAG_GBH)) { + zio_t *czio, *cio; + (void) printf("\n"); + for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL; + ck < ZIO_CHECKSUM_FUNCTIONS; ck++) { + + if ((zio_checksum_table[ck].ci_flags & + ZCHECKSUM_FLAG_EMBEDDED) || + ck == ZIO_CHECKSUM_NOPARITY) { + continue; + } + BP_SET_CHECKSUM(bp, ck); + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + czio->io_bp = bp; + + if (vd == vd->vdev_top) { + cio = zio_read(czio, spa, bp, pabd, psize, + NULL, NULL, + ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | + ZIO_FLAG_DONT_RETRY, NULL); + zio_nowait(cio); + } else { + zio_nowait(zio_vdev_child_io(czio, bp, vd, + offset, pabd, psize, ZIO_TYPE_READ, + ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_DONT_CACHE | + ZIO_FLAG_DONT_PROPAGATE | + ZIO_FLAG_DONT_RETRY | + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | + ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_OPTIONAL, NULL, NULL)); + } + error = zio_wait(czio); + if (error == 0 || error == ECKSUM) { + zio_checksum_compute(czio, ck, pabd, lsize); + printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", + zio_checksum_table[ck].ci_name, + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], + (u_longlong_t)bp->blk_cksum.zc_word[2], + (u_longlong_t)bp->blk_cksum.zc_word[3]); + } else { + printf("error %d reading block\n", error); + } + spa_config_exit(spa, SCL_STATE, FTAG); + } + } + if (borrowed) abd_return_buf_copy(pabd, buf, size); diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index c28cf12baee..4f74c4b26ac 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -248,6 +248,8 @@ and, optionally, .Bl -tag -compact -width "b offset" .It Sy b Ar offset Print block pointer +.It Sy c +Calculate and display checksums .It Sy d Decompress the block. Set environment variable .Nm ZDB_NO_ZLE diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8169ad57ffa..916631e41a9 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -106,7 +106,7 @@ tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', - 'zdb_005_pos', 'zdb_006_pos'] + 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum'] pre = post = tags = ['functional', 'cli_root', 'zdb'] diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am index d37bcf607f4..0c4de2b2558 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am @@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \ zdb_003_pos.ksh \ zdb_004_pos.ksh \ zdb_005_pos.ksh \ - zdb_006_pos.ksh + zdb_006_pos.ksh \ + zdb_checksum.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh new file mode 100755 index 00000000000..9bc3603d46a --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# zdb -c will display the same checksum as -ddddddbbbbbb +# +# Strategy: +# 1. Create a pool +# 2. Write some data to a file +# 3. Run zdb -ddddddbbbbbb against the file +# 4. Record the checksum and DVA of L0 block 0 +# 5. Run zdb -R with :c flag and match the checksum + + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify zdb -R generates the correct checksum." +log_onexit cleanup +init_data=$TESTDIR/file1 +write_count=8 +blksize=131072 +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +file_write -o create -w -f $init_data -b $blksize -c $write_count + +# get object number of file +listing=$(ls -i $init_data) +set -A array $listing +obj=${array[0]} +log_note "file $init_data has object number $obj" + +output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \ + |grep -m 1 "L0 DVA" |head -n1) +dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output") +log_note "block 0 of $init_data has a DVA of $dva" +cksum_expected=$(grep -oP '(?<=cksum=)[ A-Za-z0-9:]*' <<< "$output") +log_note "expecting cksum $cksum_expected" +output=$(zdb -R $TESTPOOL $dva:c 2> /dev/null) +result=$(grep $cksum_expected <<< "$output") +(( $? != 0 )) && log_fail "zdb -R failed to print the correct checksum" + +log_pass "zdb -R generates the correct checksum" From 36fe63042cf4a140ec88a58e77150e40a5d2b7c8 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 27 Nov 2019 10:35:49 -0800 Subject: [PATCH 273/325] Remove zfs_vdev_elevator module option As described in commit f81d5ef6 the zfs_vdev_elevator module option is being removed. Users who require this functionality should update their systems to set the disk scheduler using a udev rule. Reviewed-by: Richard Laager Reviewed-by: loli10K Signed-off-by: Brian Behlendorf Issue #8664 Closes #9417 Closes #9609 --- config/kernel-elevator-change.m4 | 26 ------- config/kernel.m4 | 2 - include/linux/blkdev_compat.h | 8 -- man/man5/zfs-module-parameters.5 | 14 ---- module/zfs/vdev_disk.c | 130 +++++-------------------------- 5 files changed, 19 insertions(+), 161 deletions(-) delete mode 100644 config/kernel-elevator-change.m4 diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4 deleted file mode 100644 index 3aa7320406d..00000000000 --- a/config/kernel-elevator-change.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 2.6.36 API, exported elevator_change() symbol -dnl # 4.12 API, removed elevator_change() symbol -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_ELEVATOR_CHANGE], [ - ZFS_LINUX_TEST_SRC([elevator_change], [ - #include - #include - ],[ - struct request_queue *q = NULL; - char *elevator = NULL; - int error __attribute__ ((unused)) = - elevator_change(q, elevator); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [ - AC_MSG_CHECKING([whether elevator_change() is available]) - ZFS_LINUX_TEST_RESULT([elevator_change], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ELEVATOR_CHANGE, 1, - [elevator_change() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index b22a00cdd13..dce619729d4 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -115,7 +115,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_FST_MOUNT ZFS_AC_KERNEL_SRC_BDI ZFS_AC_KERNEL_SRC_SET_NLINK - ZFS_AC_KERNEL_SRC_ELEVATOR_CHANGE ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE ZFS_AC_KERNEL_SRC_VFS_GETATTR @@ -232,7 +231,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_FST_MOUNT ZFS_AC_KERNEL_BDI ZFS_AC_KERNEL_SET_NLINK - ZFS_AC_KERNEL_ELEVATOR_CHANGE ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_LSEEK_EXECUTE ZFS_AC_KERNEL_VFS_GETATTR diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 084ea61ccc9..ace461dc98f 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -638,14 +638,6 @@ blk_queue_discard_secure(struct request_queue *q) #endif } -/* - * Default Linux IO Scheduler, - * Setting the scheduler to noop will allow the Linux IO scheduler to - * still perform front and back merging, while leaving the request - * ordering and prioritization to the ZFS IO scheduler. - */ -#define VDEV_SCHEDULER "noop" - /* * A common holder for vdev_bdev_open() is used to relax the exclusive open * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 536eb1466bd..8d30e949f57 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2798,20 +2798,6 @@ threshold. Default value: \fB32,768\fR. .RE -.sp -.ne 2 -.na -\fBzfs_vdev_scheduler\fR (charp) -.ad -.RS 12n -Set the Linux I/O scheduler on whole disk vdevs to this scheduler. This -option has been deprecated and will be removed in a future release. The -standard \fB/sys/block//queue/scheduler\fR interface should be used -to set a block device scheduler. -.sp -Default value: \fBnoop\fR. -.RE - .sp .ne 2 .na diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 5786169bc2f..ed79ede7ce1 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -38,7 +38,6 @@ #include #include -char *zfs_vdev_scheduler = VDEV_SCHEDULER; static void *zfs_vdev_holder = VDEV_HOLDER; /* size of the "reserved" partition, in blocks */ @@ -160,75 +159,6 @@ vdev_disk_error(zio_t *zio) zio->io_flags); } -/* - * Use the Linux 'noop' elevator for zfs managed block devices. This - * strikes the ideal balance by allowing the zfs elevator to do all - * request ordering and prioritization. While allowing the Linux - * elevator to do the maximum front/back merging allowed by the - * physical device. This yields the largest possible requests for - * the device with the lowest total overhead. - */ -static void -vdev_elevator_switch(vdev_t *v, char *elevator) -{ - vdev_disk_t *vd = v->vdev_tsd; - struct request_queue *q; - char *device; - int error; - - for (int c = 0; c < v->vdev_children; c++) - vdev_elevator_switch(v->vdev_child[c], elevator); - - if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL) - return; - - q = bdev_get_queue(vd->vd_bdev); - device = vd->vd_bdev->bd_disk->disk_name; - - /* - * Skip devices which are not whole disks (partitions). - * Device-mapper devices are excepted since they may be whole - * disks despite the vdev_wholedisk flag, in which case we can - * and should switch the elevator. If the device-mapper device - * does not have an elevator (i.e. dm-raid, dm-crypt, etc.) the - * "Skip devices without schedulers" check below will fail. - */ - if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0) - return; - - /* Leave existing scheduler when set to "none" */ - if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4)) - return; - - /* - * The elevator_change() function was available in kernels from - * 2.6.36 to 4.11. When not available fall back to using the user - * mode helper functionality to set the elevator via sysfs. This - * requires /bin/echo and sysfs to be mounted which may not be true - * early in the boot process. - */ -#ifdef HAVE_ELEVATOR_CHANGE - error = elevator_change(q, elevator); -#else -#define SET_SCHEDULER_CMD \ - "exec 0/sys/block/%s/queue/scheduler " \ - " 2>/dev/null; " \ - "echo %s" - - char *argv[] = { "/bin/sh", "-c", NULL, NULL }; - char *envp[] = { NULL }; - - argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); - error = call_usermodehelper(argv[0], argv, envp, UMH_NO_WAIT); - strfree(argv[2]); -#endif /* HAVE_ELEVATOR_CHANGE */ - if (error) { - zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d", - elevator, v->vdev_path, device, error); - } -} - static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) @@ -360,9 +290,6 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, /* Based on the minimum sector size set the block size */ *ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; - /* Try to set the io scheduler elevator algorithm */ - (void) vdev_elevator_switch(v, zfs_vdev_scheduler); - return (0); } @@ -903,44 +830,6 @@ vdev_disk_rele(vdev_t *vd) /* XXX: Implement me as a vnode rele for the device */ } -static int -param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) -{ - spa_t *spa = NULL; - char *p; - - if (val == NULL) - return (SET_ERROR(-EINVAL)); - - if ((p = strchr(val, '\n')) != NULL) - *p = '\0'; - - if (spa_mode_global != 0) { - mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) { - if (spa_state(spa) != POOL_STATE_ACTIVE || - !spa_writeable(spa) || spa_suspended(spa)) - continue; - - spa_open_ref(spa, FTAG); - mutex_exit(&spa_namespace_lock); - vdev_elevator_switch(spa->spa_root_vdev, (char *)val); - mutex_enter(&spa_namespace_lock); - spa_close(spa, FTAG); - } - mutex_exit(&spa_namespace_lock); - } - - - int error = param_set_charp(val, kp); - if (error == 0) { - printk(KERN_INFO "The 'zfs_vdev_scheduler' module option " - "will be removed in a future release.\n"); - } - - return (error); -} - vdev_ops_t vdev_disk_ops = { .vdev_op_open = vdev_disk_open, .vdev_op_close = vdev_disk_close, @@ -957,6 +846,25 @@ vdev_ops_t vdev_disk_ops = { .vdev_op_leaf = B_TRUE /* leaf vdev */ }; +/* + * The zfs_vdev_scheduler module option has been deprecated. Setting this + * value no longer has any effect. It has not yet been entirely removed + * to allow the module to be loaded if this option is specified in the + * /etc/modprobe.d/zfs.conf file. The following warning will be logged. + */ +static int +param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) +{ + int error = param_set_charp(val, kp); + if (error == 0) { + printk(KERN_INFO "The 'zfs_vdev_scheduler' module option " + "is not supported.\n"); + } + + return (error); +} + +char *zfs_vdev_scheduler = "unused"; module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler, param_get_charp, &zfs_vdev_scheduler, 0644); MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler"); From 48be45cd2dce9ef153e5bab2492dba00fa3ab0f1 Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Wed, 27 Nov 2019 13:45:56 -0500 Subject: [PATCH 274/325] Implement -A (ignore ASSERTs) for zdb The command line switch -A (ignore ASSERTs) has always been available in zdb but was never connected up to the correct global variable. There are times when you need zdb to ignore asserts and keep dumping out whatever information it can get despite the ASSERT(s) failing. It was always intended to be part of zdb but was incomplete. Reviewed-by: Brian Behlendorf Signed-off-by: Paul Zuchowski Closes #9610 --- lib/libspl/include/assert.h | 10 ++++++++++ lib/libzpool/kernel.c | 1 - 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index f615fbdfe7c..b7b406850f7 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -33,11 +33,18 @@ #include #include +#ifndef _KERNEL +int aok; +#endif + static inline int libspl_assert(const char *buf, const char *file, const char *func, int line) { fprintf(stderr, "%s\n", buf); fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func); + if (aok) { + return (0); + } abort(); } @@ -52,6 +59,9 @@ libspl_assertf(const char *file, const char *func, int line, char *format, ...) fprintf(stderr, "\n"); fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func); va_end(args); + if (aok) { + return; + } abort(); } diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index da172449c73..5d80f9e78cd 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -47,7 +47,6 @@ * Emulation of kernel services in userland. */ -int aok; uint64_t physmem; vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; From 922244cc23dd6cdd2a17b5e0317d69c8317ebf62 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 22 Nov 2019 18:48:03 +0100 Subject: [PATCH 275/325] Fix small typo in systemd mount generator Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Signed-off-by: InsanePrawn Closes #9611 --- etc/systemd/system-generators/zfs-mount-generator.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index be2c1420cd7..4f9443a91c9 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -78,7 +78,7 @@ process_line() { if [ "${p_canmount}" = "off" ] ; then return elif [ "${p_canmount}" = "noauto" ] ; then - # Don't let a noauto marked mountpoint block an "auto" market mountpoint + # Don't let a noauto marked mountpoint block an "auto" marked mountpoint return elif [ "${p_canmount}" = "on" ] ; then : # This is OK From 19ea83c59483baef04866065e289d50915ae5508 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 23 Nov 2019 16:16:06 +0100 Subject: [PATCH 276/325] Fix non-absolute path in systemd mount generator Systemd will ignore units that try to execute programs from non-absolute paths. Use hardcoded /bin/sh instead. Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Signed-off-by: InsanePrawn Closes #9611 --- etc/systemd/system-generators/zfs-mount-generator.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 4f9443a91c9..2eba7471606 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -181,7 +181,7 @@ process_line() { pathdep="RequiresMountsFor='${p_keyloc#file://}'" keyloadcmd="@sbindir@/zfs load-key '${dataset}'" elif [ "${p_keyloc}" = "prompt" ] ; then - keyloadcmd="sh -c 'set -eu;"\ + keyloadcmd="/bin/sh -c 'set -eu;"\ "keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ "[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ "count=0;"\ From e74055920e65d967fee3c62143acf86d8f556196 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 22 Nov 2019 18:53:51 +0100 Subject: [PATCH 277/325] Fix encryption logic in systemd mount generator Previously the generator would skip a dataset if it wasn't mountable by 'zfs mount -a' (legacy/none mountpoint, canmount off/noauto). This also skipped the generation of key-load units for such datasets, breaking the dependency handling for mountable child datasets. Reviewed-by: Antonio Russo Reviewed-by: Richard Laager Signed-off-by: InsanePrawn Closes #9611 --- .../system-generators/zfs-mount-generator.in | 107 ++++++++++-------- 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 2eba7471606..850396fb6c2 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -74,6 +74,62 @@ process_line() { p_encroot="${11}" p_keyloc="${12}" + # Minimal pre-requisites to mount a ZFS dataset + wants="zfs-import.target" + + # Handle encryption + if [ -n "${p_encroot}" ] && + [ "${p_encroot}" != "-" ] ; then + keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" + if [ "${p_encroot}" = "${dataset}" ] ; then + pathdep="" + if [ "${p_keyloc%%://*}" = "file" ] ; then + pathdep="RequiresMountsFor='${p_keyloc#file://}'" + keyloadcmd="@sbindir@/zfs load-key '${dataset}'" + elif [ "${p_keyloc}" = "prompt" ] ; then + keyloadcmd="/bin/sh -c 'set -eu;"\ +"keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ +"[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ +"count=0;"\ +"while [ \$\$count -lt 3 ];do"\ +" systemd-ask-password --id=\"zfs:${dataset}\""\ +" \"Enter passphrase for ${dataset}:\"|"\ +" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ +" count=\$\$((count + 1));"\ +"done;"\ +"exit 1'" + else + printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ + "${dataset}" >/dev/kmsg + fi + + # Generate the key-load .service unit + cat > "${dest_norm}/${keyloadunit}" << EOF +# Automatically generated by zfs-mount-generator + +[Unit] +Description=Load ZFS key for ${dataset} +SourcePath=${cachefile} +Documentation=man:zfs-mount-generator(8) +DefaultDependencies=no +Wants=${wants} +After=${wants} +${pathdep} + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=${keyloadcmd} +ExecStop=@sbindir@/zfs unload-key '${dataset}' +EOF + fi + # Update the dependencies for the mount file to require the + # key-loading unit. + wants="${wants} ${keyloadunit}" + fi + + # Prepare the .mount unit + # Check for canmount=off . if [ "${p_canmount}" = "off" ] ; then return @@ -170,56 +226,6 @@ process_line() { "${dataset}" >/dev/kmsg fi - # Minimal pre-requisites to mount a ZFS dataset - wants="zfs-import.target" - if [ -n "${p_encroot}" ] && - [ "${p_encroot}" != "-" ] ; then - keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" - if [ "${p_encroot}" = "${dataset}" ] ; then - pathdep="" - if [ "${p_keyloc%%://*}" = "file" ] ; then - pathdep="RequiresMountsFor='${p_keyloc#file://}'" - keyloadcmd="@sbindir@/zfs load-key '${dataset}'" - elif [ "${p_keyloc}" = "prompt" ] ; then - keyloadcmd="/bin/sh -c 'set -eu;"\ -"keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ -"[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ -"count=0;"\ -"while [ \$\$count -lt 3 ];do"\ -" systemd-ask-password --id=\"zfs:${dataset}\""\ -" \"Enter passphrase for ${dataset}:\"|"\ -" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ -" count=\$\$((count + 1));"\ -"done;"\ -"exit 1'" - else - printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ - "${dataset}" >/dev/kmsg - fi - cat > "${dest_norm}/${keyloadunit}" << EOF -# Automatically generated by zfs-mount-generator - -[Unit] -Description=Load ZFS key for ${dataset} -SourcePath=${cachefile} -Documentation=man:zfs-mount-generator(8) -DefaultDependencies=no -Wants=${wants} -After=${wants} -${pathdep} - -[Service] -Type=oneshot -RemainAfterExit=yes -ExecStart=${keyloadcmd} -ExecStop=@sbindir@/zfs unload-key '${dataset}' -EOF - fi - # Update the dependencies for the mount file to require the - # key-loading unit. - wants="${wants} ${keyloadunit}" - fi - # If the mountpoint has already been created, give it precedence. if [ -e "${dest_norm}/${mountfile}" ] ; then printf 'zfs-mount-generator: %s already exists\n' "${mountfile}" \ @@ -227,6 +233,7 @@ EOF return fi + # Create the .mount unit file. # By ordering before zfs-mount.service, we avoid race conditions. cat > "${dest_norm}/${mountfile}" << EOF # Automatically generated by zfs-mount-generator From 85204e30dd10f244770b9df511c3fd7b1893d29b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Mon, 2 Dec 2019 22:23:47 +0100 Subject: [PATCH 278/325] Adapt gitignore for modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the specific gitignore rules for module left-overs and add a generic one in modules/. Reviewed-by: Ryan Moeller Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Signed-off-by: Michael Niewöhner Closes #9656 --- .gitignore | 12 ------------ module/.gitignore | 1 + 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 19377a7b126..57867bfc6ea 100644 --- a/.gitignore +++ b/.gitignore @@ -63,15 +63,3 @@ cscope.* *.orig *.log venv - -# -# Module leftovers -# -/module/avl/zavl.mod -/module/icp/icp.mod -/module/lua/zlua.mod -/module/nvpair/znvpair.mod -/module/spl/spl.mod -/module/unicode/zunicode.mod -/module/zcommon/zcommon.mod -/module/zfs/zfs.mod diff --git a/module/.gitignore b/module/.gitignore index 1ea8ef0bb81..45e5f992223 100644 --- a/module/.gitignore +++ b/module/.gitignore @@ -5,6 +5,7 @@ *.dwo .*.cmd .*.d +*.mod /.cache.mk /.tmp_versions From 9cf46ddedc0da8a2f773f9919938f32c33edba70 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 3 Dec 2019 09:58:03 -0800 Subject: [PATCH 279/325] Increase allowed 'special_small_blocks' maximum value There may be circumstances where it's desirable that all blocks in a specified dataset be stored on the special device. Relax the artificial 128K limit and allow the special_small_blocks property to be set up to 1M. When blocks >1MB have been enabled via the zfs_max_recordsize module option, this limit is increased accordingly. Reviewed-by: Don Brady Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9131 Closes #9355 --- lib/libzfs/libzfs_dataset.c | 17 +++++++++++++---- man/man8/zfs.8 | 2 +- module/zcommon/zfs_prop.c | 2 +- .../alloc_class/alloc_class_011_neg.ksh | 3 ++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 7f33e244797..4a07c8d20bf 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1230,12 +1230,19 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } case ZFS_PROP_SPECIAL_SMALL_BLOCKS: + { + int maxbs = SPA_OLD_MAXBLOCKSIZE; + char buf[64]; + if (zpool_hdl != NULL) { char state[64] = ""; + maxbs = zpool_get_prop_int(zpool_hdl, + ZPOOL_PROP_MAXBLOCKSIZE, NULL); + /* * Issue a warning but do not fail so that - * tests for setable properties succeed. + * tests for settable properties succeed. */ if (zpool_prop_get_feature(zpool_hdl, "feature@allocation_classes", state, @@ -1248,15 +1255,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } if (intval != 0 && (intval < SPA_MINBLOCKSIZE || - intval > SPA_OLD_MAXBLOCKSIZE || !ISP2(intval))) { + intval > maxbs || !ISP2(intval))) { + zfs_nicebytes(maxbs, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid '%s=%d' property: must be zero or " - "a power of 2 from 512B to 128K"), propname, - intval); + "a power of 2 from 512B to %s"), propname, + intval, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; + } case ZFS_PROP_MLSLABEL: { diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 1572ac98265..496363642b9 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -1545,7 +1545,7 @@ This value represents the threshold block size for including small file blocks into the special allocation class. Blocks smaller than or equal to this value will be assigned to the special allocation class while greater blocks will be assigned to the regular class. Valid values are zero or a power of two -from 512B up to 128K. The default size is 0 which means no small file blocks +from 512B up to 1M. The default size is 0 which means no small file blocks will be allocated in the special class. .Pp Before setting this property, a special class vdev must be added to the diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index f1c4158388f..cddf3e88db2 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -540,7 +540,7 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE"); zprop_register_number(ZFS_PROP_SPECIAL_SMALL_BLOCKS, "special_small_blocks", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "zero or 512 to 128K, power of 2", "SPECIAL_SMALL_BLOCKS"); + "zero or 512 to 1M, power of 2", "SPECIAL_SMALL_BLOCKS"); /* hidden properties */ zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh index fe1ae366a6d..d804e5371eb 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh @@ -21,6 +21,7 @@ # # DESCRIPTION: # Setting the special_small_blocks property to invalid values fails. +# Powers of two from 512 to 1M are allowed. # verify_runnable "global" @@ -34,7 +35,7 @@ log_must disk_setup log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ $CLASS_DISK0 $CLASS_DISK1 -for value in 256 1025 262144 +for value in 256 1025 2097152 do log_mustnot zfs set special_small_blocks=$value $TESTPOOL done From 388ef045b27682de15c6afc91ed86333d405f09f Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 3 Dec 2019 12:59:30 -0500 Subject: [PATCH 280/325] Fix use-after-free in case of L2ARC prefetch failure In case L2ARC read failed, l2arc_read_done() creates _different_ ZIO to read data from the original storage device. Unfortunately pointer to the failed ZIO remains in hdr->b_l1hdr.b_acb->acb_zio_head, and if some other read try to bump the ZIO priority, it will crash. The problem is reproducible by corrupting L2ARC content and reading some data with prefetch if l2arc_noprefetch tunable is changed to 0. With the default setting the issue is probably not reproducible now. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #9648 --- module/zfs/arc.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index e16b44ca82e..03097cd83dc 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -8561,7 +8561,6 @@ l2arc_read_done(zio_t *zio) zio->io_private = hdr; arc_read_done(zio); } else { - mutex_exit(hash_lock); /* * Buffer didn't survive caching. Increment stats and * reissue to the original storage device. @@ -8586,10 +8585,24 @@ l2arc_read_done(zio_t *zio) ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); - zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, + zio = zio_read(pio, zio->io_spa, zio->io_bp, abd, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, - &cb->l2rcb_zb)); + &cb->l2rcb_zb); + + /* + * Original ZIO will be freed, so we need to update + * ARC header with the new ZIO pointer to be used + * by zio_change_priority() in arc_read(). + */ + for (struct arc_callback *acb = hdr->b_l1hdr.b_acb; + acb != NULL; acb = acb->acb_next) + acb->acb_zio_head = zio; + + mutex_exit(hash_lock); + zio_nowait(zio); + } else { + mutex_exit(hash_lock); } } From 73b5231187becf6f406486ed6cbb8a3c8481e76c Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Tue, 3 Dec 2019 17:37:15 -0500 Subject: [PATCH 281/325] Fix zdb_read_block using zio after it is destroyed The checksum display code of zdb_read_block uses a zio to read in the block and then calls zio_checksum_compute. Use a new zio in the call to zio_checksum_compute not the zio from the read which has been destroyed by zio_wait. Reviewed-by: Brian Behlendorf Reviewed-by: Igor Kozhukhov Signed-off-by: Paul Zuchowski Closes #9644 Closes #9657 --- cmd/zdb/zdb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 68122390e05..05891f84a18 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -5859,13 +5859,18 @@ zdb_read_block(char *thing, spa_t *spa) } error = zio_wait(czio); if (error == 0 || error == ECKSUM) { - zio_checksum_compute(czio, ck, pabd, lsize); + zio_t *ck_zio = zio_root(spa, NULL, NULL, 0); + ck_zio->io_offset = + DVA_GET_OFFSET(&bp->blk_dva[0]); + ck_zio->io_bp = bp; + zio_checksum_compute(ck_zio, ck, pabd, lsize); printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", zio_checksum_table[ck].ci_name, (u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[1], (u_longlong_t)bp->blk_cksum.zc_word[2], (u_longlong_t)bp->blk_cksum.zc_word[3]); + zio_wait(ck_zio); } else { printf("error %d reading block\n", error); } From ba8a5a882d8317207a91d096f1574e987dfef428 Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Wed, 4 Dec 2019 16:24:56 -0500 Subject: [PATCH 282/325] Fix reporting of L2ARC hits/misses in arc_summary3 arc_summary3 reports L2ARC hits and misses as Bytes, whereas they should be reported as events. arc_summary2 reports these correctly. Reviewed-by: Ryan Moeller Reviewed-by: Kjeld Schouten Signed-off-by: George Amanakis Closes #9669 --- cmd/arc_summary/arc_summary3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index 7bee77061d5..e9890bf21e6 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -677,10 +677,10 @@ def section_l2arc(kstats_dict): prt_1('L2ARC breakdown:', f_hits(l2_access_total)) prt_i2('Hit ratio:', f_perc(arc_stats['l2_hits'], l2_access_total), - f_bytes(arc_stats['l2_hits'])) + f_hits(arc_stats['l2_hits'])) prt_i2('Miss ratio:', f_perc(arc_stats['l2_misses'], l2_access_total), - f_bytes(arc_stats['l2_misses'])) + f_hits(arc_stats['l2_misses'])) prt_i1('Feeds:', f_hits(arc_stats['l2_feeds'])) print() From 85ff6a23f4f444c527ee0691ebd6f20022027ca7 Mon Sep 17 00:00:00 2001 From: Kjeld Schouten Date: Fri, 6 Dec 2019 18:37:48 +0100 Subject: [PATCH 283/325] Set send_realloc_files.ksh to use properties.shlib This sets send_realloc_files.ksh to use properties.shlib (like the other compression related tests) It was missing from #9645 Reviewed-by: Brian Behlendorf Signed-off-by: Kjeld Schouten-Lebbing Issue #9645 Closes #9679 --- tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh index 27d65439b25..8e9db969bc2 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh @@ -15,6 +15,7 @@ # Copyright (c) 2019 by Lawrence Livermore National Security, LLC. # +. $STF_SUITE/include/properties.shlib . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/rsend/rsend.kshlib @@ -71,7 +72,7 @@ for i in {1..$passes}; do # Randomly modify several dataset properties in order to generate # more interesting incremental send streams. rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256" - rand_set_prop $POOL/fs compression "off" "lzjb" "gzip" "lz4" + rand_set_prop $POOL/fs compression "${compress_prop_vals[@]}" rand_set_prop $POOL/fs recordsize "32K" "128K" rand_set_prop $POOL/fs dnodesize "legacy" "auto" "4k" rand_set_prop $POOL/fs xattr "on" "sa" From 2525b71c680420a04f35c1388a030f0a36f725f3 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Dec 2019 11:09:14 -0800 Subject: [PATCH 284/325] ZTS: Fix zpool_reopen_001_pos Update the vdev_disk_open() retry logic to use a specified number of milliseconds to be more robust. Additionally, on failure log both the time waited and requested timeout to the internal log. The default maximum allowed open retry time has been increased from 500ms to 1000ms. Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9680 Conflicts: --- module/zfs/vdev_disk.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index ed79ede7ce1..661f0f1b727 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -38,9 +38,21 @@ #include #include +/* + * Unique identifier for the exclusive vdev holder. + */ static void *zfs_vdev_holder = VDEV_HOLDER; -/* size of the "reserved" partition, in blocks */ +/* + * Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the + * device is missing. The missing path may be transient since the links + * can be briefly removed and recreated in response to udev events. + */ +static unsigned zfs_vdev_open_timeout_ms = 1000; + +/* + * Size of the "reserved" partition, in blocks. + */ #define EFI_MIN_RESV_SIZE (16 * 1024) /* @@ -165,8 +177,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, { struct block_device *bdev; fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); - int count = 0, block_size; - int bdev_retry_count = 50; + hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms); vdev_disk_t *vd; /* Must have a pathname and it must be absolute. */ @@ -181,7 +192,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * partition force re-scanning the partition table while closed * in order to get an accurate updated block device size. Then * since udev may need to recreate the device links increase the - * open retry count before reporting the device as unavailable. + * open retry timeout before reporting the device as unavailable. */ vd = v->vdev_tsd; if (vd) { @@ -206,8 +217,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (!IS_ERR(bdev)) { int error = vdev_bdev_reread_part(bdev); vdev_bdev_close(bdev, mode); - if (error == 0) - bdev_retry_count = 100; + if (error == 0) { + timeout = MSEC2NSEC( + zfs_vdev_open_timeout_ms * 2); + } } } } else { @@ -240,12 +253,12 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * and it is reasonable to sleep and retry before giving up. In * practice delays have been observed to be on the order of 100ms. */ + hrtime_t start = gethrtime(); bdev = ERR_PTR(-ENXIO); - while (IS_ERR(bdev) && count < bdev_retry_count) { + while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) { bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { schedule_timeout(MSEC_TO_TICK(10)); - count++; } else if (IS_ERR(bdev)) { break; } @@ -253,7 +266,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (IS_ERR(bdev)) { int error = -PTR_ERR(bdev); - vdev_dbgmsg(v, "open error=%d count=%d", error, count); + vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error, + (u_longlong_t)(gethrtime() - start), + (u_longlong_t)timeout); vd->vd_bdev = NULL; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); @@ -267,7 +282,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, struct request_queue *q = bdev_get_queue(vd->vd_bdev); /* Determine the physical block size */ - block_size = vdev_bdev_block_size(vd->vd_bdev); + int block_size = vdev_bdev_block_size(vd->vd_bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; From d2233a08fa872f9a3af0c84697c0366bb85eea5e Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Mon, 9 Dec 2019 12:29:56 -0800 Subject: [PATCH 285/325] Exclude data from cores unconditionally and metadata conditionally This change allows us to align the code dump logic across platforms. Reviewed-by: Jorgen Lundman Reviewed-by: Brian Behlendorf Reviewed-by: Don Brady Signed-off-by: Matt Macy Closes #9691 --- include/sys/zio.h | 1 + module/zfs/zio.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/sys/zio.h b/include/sys/zio.h index e69bf920803..aa58fe1fafd 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -278,6 +278,7 @@ enum zio_wait_type { typedef void zio_done_func_t(zio_t *zio); +extern int zio_exclude_metadata; extern int zio_dva_throttle_enabled; extern const char *zio_type_name[ZIO_TYPES]; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index aac0392a4ad..1bd9f2e90b0 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -121,6 +121,11 @@ int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ */ #define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) +/* + * Enable smaller cores by excluding metadata + * allocations as well. + */ +int zio_exclude_metadata = 0; int zio_requeue_io_start_cut_in_line = 1; #ifdef ZFS_DEBUG @@ -153,7 +158,11 @@ zio_init(void) size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t p2 = size; size_t align = 0; - size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; + size_t data_cflags, cflags; + + data_cflags = KMC_NODEBUG; + cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ? + KMC_NODEBUG : 0; #if defined(_ILP32) && defined(_KERNEL) /* @@ -201,7 +210,7 @@ zio_init(void) (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); zio_data_buf_cache[c] = kmem_cache_create(name, size, align, NULL, NULL, NULL, NULL, - data_alloc_arena, cflags); + data_alloc_arena, data_cflags); } } From 4d658bda326e3f37250ee788a08636b60bbf6328 Mon Sep 17 00:00:00 2001 From: Paul Zuchowski <31706010+PaulZ-98@users.noreply.github.com> Date: Tue, 10 Dec 2019 18:51:58 -0500 Subject: [PATCH 286/325] zio_decompress_data always ASSERTs successful decompression This interferes with zdb_read_block trying all the decompression algorithms when the 'd' flag is specified, as some are expected to fail. Also control the output when guessing algorithms, try the more common compression types first, allow specifying lsize/psize, and fix an uninitialized variable. Reviewed-by: Ryan Moeller Reviewed-by: Brian Behlendorf Signed-off-by: Paul Zuchowski Closes #9612 Closes #9630 --- cmd/zdb/zdb.c | 129 +++++++++++------- man/man8/zdb.8 | 10 +- module/zfs/zio_compress.c | 1 - tests/runfiles/linux.run | 2 +- .../tests/functional/cli_root/zdb/Makefile.am | 3 +- .../cli_root/zdb/zdb_decompress.ksh | 119 ++++++++++++++++ 6 files changed, 205 insertions(+), 59 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 05891f84a18..92bfd3ecfe3 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2409,7 +2409,7 @@ static const char *objset_types[DMU_OST_NUMTYPES] = { static void dump_dir(objset_t *os) { - dmu_objset_stats_t dds; + dmu_objset_stats_t dds = { 0 }; uint64_t object, object_count; uint64_t refdbytes, usedobjs, scratch; char numbuf[32]; @@ -5447,9 +5447,9 @@ dump_zpool(spa_t *spa) #define ZDB_FLAG_BSWAP 0x0004 #define ZDB_FLAG_GBH 0x0008 #define ZDB_FLAG_INDIRECT 0x0010 -#define ZDB_FLAG_PHYS 0x0020 -#define ZDB_FLAG_RAW 0x0040 -#define ZDB_FLAG_PRINT_BLKPTR 0x0080 +#define ZDB_FLAG_RAW 0x0020 +#define ZDB_FLAG_PRINT_BLKPTR 0x0040 +#define ZDB_FLAG_VERBOSE 0x0080 static int flagbits[256]; @@ -5580,11 +5580,30 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) return (NULL); } +static boolean_t +zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) +{ + char *s0, *s1; + + if (sizes == NULL) + return (B_FALSE); + + s0 = strtok(sizes, "/"); + if (s0 == NULL) + return (B_FALSE); + s1 = strtok(NULL, "/"); + *lsize = strtoull(s0, NULL, 16); + *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; + return (*lsize >= *psize && *psize > 0); +} + +#define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) + /* * Read a block from a pool and print it out. The syntax of the * block descriptor is: * - * pool:vdev_specifier:offset:size[:flags] + * pool:vdev_specifier:offset:[lsize/]psize[:flags] * * pool - The name of the pool you wish to read from * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) @@ -5597,8 +5616,8 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) * e: Byteswap data before dumping * g: Display data as a gang block header * i: Display as an indirect block - * p: Do I/O to physical offset * r: Dump raw data to stdout + * v: Verbose * */ static void @@ -5607,13 +5626,12 @@ zdb_read_block(char *thing, spa_t *spa) blkptr_t blk, *bp = &blk; dva_t *dva = bp->blk_dva; int flags = 0; - uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; + uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; zio_t *zio; vdev_t *vd; abd_t *pabd; void *lbuf, *buf; - const char *s, *vdev; - char *p, *dup, *flagstr; + char *s, *p, *dup, *vdev, *flagstr, *sizes; int i, error; boolean_t borrowed = B_FALSE; @@ -5622,18 +5640,14 @@ zdb_read_block(char *thing, spa_t *spa) vdev = s ? s : ""; s = strtok(NULL, ":"); offset = strtoull(s ? s : "", NULL, 16); + sizes = strtok(NULL, ":"); s = strtok(NULL, ":"); - size = strtoull(s ? s : "", NULL, 16); - s = strtok(NULL, ":"); - if (s) - flagstr = strdup(s); - else - flagstr = strdup(""); + flagstr = strdup(s ? s : ""); s = NULL; - if (size == 0) - s = "size must not be zero"; - if (!IS_P2ALIGNED(size, DEV_BSIZE)) + if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) + s = "invalid size(s)"; + if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) s = "size must be a multiple of sector size"; if (!IS_P2ALIGNED(offset, DEV_BSIZE)) s = "offset must be a multiple of sector size"; @@ -5689,9 +5703,6 @@ zdb_read_block(char *thing, spa_t *spa) vd->vdev_ops->vdev_op_type); } - psize = size; - lsize = size; - pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); @@ -5748,30 +5759,41 @@ zdb_read_block(char *thing, spa_t *spa) * We don't know how the data was compressed, so just try * every decompress function at every inflated blocksize. */ - enum zio_compress c; void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); + int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; + int *cfuncp = cfuncs; + uint64_t maxlsize = SPA_MAXBLOCKSIZE; + uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | + ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | + (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0); + *cfuncp++ = ZIO_COMPRESS_LZ4; + *cfuncp++ = ZIO_COMPRESS_LZJB; + mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); + for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) + if (((1ULL << c) & mask) == 0) + *cfuncp++ = c; /* - * XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB, - * this could take a while and we should let the user know + * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this + * could take a while and we should let the user know * we are not stuck. On the other hand, printing progress - * info gets old after a while. What to do? + * info gets old after a while. User can specify 'v' flag + * to see the progression. */ - for (lsize = psize + SPA_MINBLOCKSIZE; - lsize <= SPA_MAXBLOCKSIZE; lsize += SPA_MINBLOCKSIZE) { - for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { - /* - * ZLE can easily decompress non zle stream. - * So have an option to disable it. - */ - if (c == ZIO_COMPRESS_ZLE && - getenv("ZDB_NO_ZLE")) - continue; - - (void) fprintf(stderr, - "Trying %05llx -> %05llx (%s)\n", - (u_longlong_t)psize, (u_longlong_t)lsize, - zio_compress_table[c].ci_name); + if (lsize == psize) + lsize += SPA_MINBLOCKSIZE; + else + maxlsize = lsize; + for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { + for (cfuncp = cfuncs; *cfuncp; cfuncp++) { + if (flags & ZDB_FLAG_VERBOSE) { + (void) fprintf(stderr, + "Trying %05llx -> %05llx (%s)\n", + (u_longlong_t)psize, + (u_longlong_t)lsize, + zio_compress_table[*cfuncp].\ + ci_name); + } /* * We randomize lbuf2, and decompress to both @@ -5780,27 +5802,30 @@ zdb_read_block(char *thing, spa_t *spa) */ VERIFY0(random_get_pseudo_bytes(lbuf2, lsize)); - if (zio_decompress_data(c, pabd, + if (zio_decompress_data(*cfuncp, pabd, lbuf, psize, lsize) == 0 && - zio_decompress_data(c, pabd, + zio_decompress_data(*cfuncp, pabd, lbuf2, psize, lsize) == 0 && bcmp(lbuf, lbuf2, lsize) == 0) break; } - if (c != ZIO_COMPRESS_FUNCTIONS) + if (*cfuncp != 0) break; } umem_free(lbuf2, SPA_MAXBLOCKSIZE); - if (lsize > SPA_MAXBLOCKSIZE) { + if (lsize > maxlsize) { (void) printf("Decompress of %s failed\n", thing); goto out; } buf = lbuf; - size = lsize; + if (*cfuncp == ZIO_COMPRESS_ZLE) { + printf("\nZLE decompression was selected. If you " + "suspect the results are wrong,\ntry avoiding ZLE " + "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); + } } else { - size = psize; - buf = abd_borrow_buf_copy(pabd, size); + buf = abd_borrow_buf_copy(pabd, lsize); borrowed = B_TRUE; } @@ -5808,14 +5833,14 @@ zdb_read_block(char *thing, spa_t *spa) zdb_print_blkptr((blkptr_t *)(void *) ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); else if (flags & ZDB_FLAG_RAW) - zdb_dump_block_raw(buf, size, flags); + zdb_dump_block_raw(buf, lsize, flags); else if (flags & ZDB_FLAG_INDIRECT) - zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), + zdb_dump_indirect((blkptr_t *)buf, lsize / sizeof (blkptr_t), flags); else if (flags & ZDB_FLAG_GBH) zdb_dump_gbh(buf, flags); else - zdb_dump_block(thing, buf, size, flags); + zdb_dump_block(thing, buf, lsize, flags); /* * If :c was specified, iterate through the checksum table to @@ -5879,7 +5904,7 @@ zdb_read_block(char *thing, spa_t *spa) } if (borrowed) - abd_return_buf_copy(pabd, buf, size); + abd_return_buf_copy(pabd, buf, lsize); out: abd_free(pabd); @@ -6294,8 +6319,8 @@ main(int argc, char **argv) flagbits['e'] = ZDB_FLAG_BSWAP; flagbits['g'] = ZDB_FLAG_GBH; flagbits['i'] = ZDB_FLAG_INDIRECT; - flagbits['p'] = ZDB_FLAG_PHYS; flagbits['r'] = ZDB_FLAG_RAW; + flagbits['v'] = ZDB_FLAG_VERBOSE; for (int i = 0; i < argc; i++) zdb_read_block(argv[i], spa); diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 4f74c4b26ac..e907d03fe37 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -63,7 +63,7 @@ .Op Fl A .Op Fl e Oo Fl V Oc Op Fl p Ar path ... .Op Fl U Ar cache -.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags +.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [/] Ns Op : Ns Ar flags .Nm .Fl S .Op Fl AP @@ -227,7 +227,7 @@ This option can be combined with .Fl v for increasing verbosity. .It Xo -.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags +.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [/] Ns Op : Ns Ar flags .Xc Read and display a block from the specified device. By default the block is displayed as a hex dump, but see the description of the @@ -240,8 +240,8 @@ The block is specified in terms of a colon-separated tuple .Ar offset .Pq the offset within the vdev .Ar size -.Pq the size of the block to read -and, optionally, +.Pq the physical size, or logical size / physical size +of the block to read and, optionally, .Ar flags .Pq a set of flags, described below . .Pp @@ -262,6 +262,8 @@ Dump gang block header Dump indirect block .It Sy r Dump raw uninterpreted block data +.It Sy v +Verbose output for guessing compression algorithm .El .It Fl s Report statistics on diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index cdaade27c67..01c51347fec 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -159,7 +159,6 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, * the checksum. However, for extra protection (e.g. against bitflips * in non-ECC RAM), we handle this error (and test it). */ - ASSERT0(ret); if (zio_decompress_fail_fraction != 0 && spa_get_random(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 916631e41a9..6579cc748f9 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -106,7 +106,7 @@ tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', - 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum'] + 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum', 'zdb_decompress'] pre = post = tags = ['functional', 'cli_root', 'zdb'] diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am index 0c4de2b2558..9f143078f18 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am @@ -6,4 +6,5 @@ dist_pkgdata_SCRIPTS = \ zdb_004_pos.ksh \ zdb_005_pos.ksh \ zdb_006_pos.ksh \ - zdb_checksum.ksh + zdb_checksum.ksh \ + zdb_decompress.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh new file mode 100755 index 00000000000..0e468d7c987 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# zdb -R pool :d will display the correct data and length +# +# Strategy: +# 1. Create a pool, set compression to lzjb +# 2. Write some identifiable data to a file +# 3. Run zdb -ddddddbbbbbb against the file +# 4. Record the DVA, lsize, and psize of L0 block 0 +# 5. Run zdb -R with :d flag and match the data +# 6. Run zdb -R with :dr flags and match the lsize/psize +# 7. Run zdb -R with :dr flags and match the lsize +# 8. Run zdb -R with :dr flags and match the psize +# + + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify zdb -R :d flag (decompress) works as expected" +log_onexit cleanup +init_data=$TESTDIR/file1 +write_count=256 +blksize=4096 +pattern="_match__pattern_" +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +log_must zfs set recordsize=$blksize $TESTPOOL/$TESTFS +log_must zfs set compression=lzjb $TESTPOOL/$TESTFS + +# 16 chars 256 times = 4k = block size +typeset four_k="" +for i in {1..$write_count} +do + four_k=$four_k$pattern +done + +# write the 4k block 256 times +for i in {1..$write_count} +do + echo $four_k >> $init_data +done + +sync_pool $TESTPOOL true + +# get object number of file +listing=$(ls -i $init_data) +set -A array $listing +obj=${array[0]} +log_note "file $init_data has object number $obj" + +output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \ + |grep -m 1 "L0 DVA" |head -n1) +dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output") +log_note "block 0 of $init_data has a DVA of $dva" + +# use the length reported by zdb -ddddddbbbbbb +size_str=$(grep -oP 'size=\K.*?(?= )' <<< "$output") +log_note "block size $size_str" + +vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}') +offset=$(echo "$dva" |awk '{split($0,array,":")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:d 2> /dev/null) +echo $output |grep $pattern > /dev/null +(( $? != 0 )) && log_fail "zdb -R :d failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +echo $output |grep $four_k > /dev/null +(( $? != 0 )) && log_fail "zdb -R :dr failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data to the length (${#output} != $size_str)" + +# decompress using lsize +lsize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[1]}') +psize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$lsize:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data (length ${#output} != $blksize)" + +# Specifying psize will decompress successfully , but not always to full +# lsize since zdb has to guess lsize incrementally. +output=$(zdb -R $TESTPOOL $vdev:$offset:$psize:dr 2> /dev/null) +result=${#output} +# convert psize to decimal +psize_orig=$psize +psize=${psize%?} +psize=$((16#$psize)) +(( $result < $psize)) && log_fail \ +"zdb -R failed to decompress the data with psize $psize_orig\ + (length ${#output} < $psize)" + +log_pass "zdb -R :d flag (decompress) works as expected" From 856d185dc291ab8e4213ed81fd30b676651e37f0 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 11 Dec 2019 15:38:21 -0800 Subject: [PATCH 287/325] Fix use-after-free of vd_path in spa_vdev_remove() After spa_vdev_remove_aux() is called, the config nvlist is no longer valid, as it's been replaced by the new one (with the specified device removed). Therefore any pointers into the nvlist are no longer valid. So we can't save the result of `fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH)` (in vd_path) across the call to spa_vdev_remove_aux(). Instead, use spa_strdup() to save a copy of the string before calling spa_vdev_remove_aux. Found by AddressSanitizer: ERROR: AddressSanitizer: heap-use-after-free on address ... READ of size 34 at 0x608000a1fcd0 thread T686 #0 0x7fe88b0c166d (/usr/lib/x86_64-linux-gnu/libasan.so.4+0x5166d) #1 0x7fe88a5acd6e in spa_strdup spa_misc.c:1447 #2 0x7fe88a688034 in spa_vdev_remove vdev_removal.c:2259 #3 0x55ffbc7748f8 in ztest_vdev_aux_add_remove ztest.c:3229 #4 0x55ffbc769fba in ztest_execute ztest.c:6714 #5 0x55ffbc779a90 in ztest_thread ztest.c:6761 #6 0x7fe889cbc6da in start_thread #7 0x7fe8899e588e in __clone 0x608000a1fcd0 is located 48 bytes inside of 88-byte region freed by thread T686 here: #0 0x7fe88b14e7b8 in __interceptor_free #1 0x7fe88ae541c5 in nvlist_free nvpair.c:874 #2 0x7fe88ae543ba in nvpair_free nvpair.c:844 #3 0x7fe88ae57400 in nvlist_remove_nvpair nvpair.c:978 #4 0x7fe88a683c81 in spa_vdev_remove_aux vdev_removal.c:185 #5 0x7fe88a68857c in spa_vdev_remove vdev_removal.c:2221 #6 0x55ffbc7748f8 in ztest_vdev_aux_add_remove ztest.c:3229 #7 0x55ffbc769fba in ztest_execute ztest.c:6714 #8 0x55ffbc779a90 in ztest_thread ztest.c:6761 #9 0x7fe889cbc6da in start_thread Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Matthew Ahrens Closes #9706 --- module/zfs/vdev_removal.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 5dba2fb6989..1ec18e7cf30 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -2136,7 +2136,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) int error = 0, error_log; boolean_t locked = MUTEX_HELD(&spa_namespace_lock); sysevent_t *ev = NULL; - char *vd_type = NULL, *vd_path = NULL, *vd_path_log = NULL; + char *vd_type = NULL, *vd_path = NULL; ASSERT(spa_writeable(spa)); @@ -2171,7 +2171,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) ESC_ZFS_VDEV_REMOVE_AUX); vd_type = VDEV_TYPE_SPARE; - vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + vd_path = spa_strdup(fnvlist_lookup_string( + nv, ZPOOL_CONFIG_PATH)); spa_vdev_remove_aux(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares, nv); spa_load_spares(spa); @@ -2184,7 +2185,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { vd_type = VDEV_TYPE_L2CACHE; - vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + vd_path = spa_strdup(fnvlist_lookup_string( + nv, ZPOOL_CONFIG_PATH)); /* * Cache devices can always be removed. */ @@ -2197,7 +2199,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) } else if (vd != NULL && vd->vdev_islog) { ASSERT(!locked); vd_type = VDEV_TYPE_LOG; - vd_path = (vd->vdev_path != NULL) ? vd->vdev_path : "-"; + vd_path = spa_strdup((vd->vdev_path != NULL) ? + vd->vdev_path : "-"); error = spa_vdev_remove_log(vd, &txg); } else if (vd != NULL) { ASSERT(!locked); @@ -2209,9 +2212,6 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) error = SET_ERROR(ENOENT); } - if (vd_path != NULL) - vd_path_log = spa_strdup(vd_path); - error_log = error; if (!locked) @@ -2224,12 +2224,12 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) * Doing that would prevent the txg sync from actually happening, * causing a deadlock. */ - if (error_log == 0 && vd_type != NULL && vd_path_log != NULL) { + if (error_log == 0 && vd_type != NULL && vd_path != NULL) { spa_history_log_internal(spa, "vdev remove", NULL, - "%s vdev (%s) %s", spa_name(spa), vd_type, vd_path_log); + "%s vdev (%s) %s", spa_name(spa), vd_type, vd_path); } - if (vd_path_log != NULL) - spa_strfree(vd_path_log); + if (vd_path != NULL) + spa_strfree(vd_path); if (ev != NULL) spa_event_post(ev); From 7ad0ae91d53c8b144c680e519d638fd6873edb08 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Fri, 13 Dec 2019 14:51:39 -0500 Subject: [PATCH 288/325] Allow empty ds_props_obj to be destroyed Currently, 'zfs list' and 'zfs get' commands can be slow when working with snapshots that have a ds_props_obj. This is because the code that discovers all of the properties for these snapshots needs to read this object for each snapshot, which almost always ends up causing an extra random synchronous read for each snapshot. This performance penalty exists even if the properties on that snapshot have been unset because the object is normally only freed when the snapshot is freed, even though it is only created when it is needed. This patch allows the user to regain 'zfs list' performance on these snapshots by destroying the ds_props_obj when it no longer has any entries left. In practice on a production machine, this optimization seems to make 'zfs list' about 55% faster. Reviewed-by: Brian Behlendorf Reviewed-by: Paul Zuchowski Signed-off-by: Tom Caputi Closes #9704 --- module/zfs/dsl_prop.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 9f892acdbf8..784a7308b08 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -649,7 +649,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t zapobj, intval, dummy; + uint64_t zapobj, intval, dummy, count; int isint; char valbuf[32]; const char *valstr = NULL; @@ -663,7 +663,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, if (ds->ds_is_snapshot) { ASSERT(version >= SPA_VERSION_SNAP_PROPS); - if (dsl_dataset_phys(ds)->ds_props_obj == 0) { + if (dsl_dataset_phys(ds)->ds_props_obj == 0 && + (source & ZPROP_SRC_NONE) == 0) { dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_props_obj = zap_create(mos, @@ -674,6 +675,10 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, zapobj = dsl_dir_phys(ds->ds_dir)->dd_props_zapobj; } + /* If we are removing objects from a non-existent ZAP just return */ + if (zapobj == 0) + return; + if (version < SPA_VERSION_RECVD_PROPS) { if (source & ZPROP_SRC_NONE) source = ZPROP_SRC_NONE; @@ -755,6 +760,18 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, strfree(inheritstr); strfree(recvdstr); + /* + * If we are left with an empty snap zap we can destroy it. + * This will prevent unnecessary calls to zap_lookup() in + * the "zfs list" and "zfs get" code paths. + */ + if (ds->ds_is_snapshot && + zap_count(mos, zapobj, &count) == 0 && count == 0) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + dsl_dataset_phys(ds)->ds_props_obj = 0; + zap_destroy(mos, zapobj, tx); + } + if (isint) { VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval)); From 6455859ee756444e65eb2b659ff009f8a33276eb Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sat, 14 Dec 2019 08:02:23 +0900 Subject: [PATCH 289/325] Don't fail to apply umask for O_TMPFILE files Apply umask to `mode` which will eventually be applied to inode. This is needed since VFS doesn't apply umask for O_TMPFILE files. (Note that zpl_init_acl() applies `ip->i_mode &= ~current_umask();` only when POSIX ACL is used.) Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Tomohiro Kusumi Closes #8997 Closes #8998 --- module/zfs/zpl_inode.c | 6 + tests/runfiles/linux.run | 3 +- .../tests/functional/tmpfile/.gitignore | 1 + .../tests/functional/tmpfile/Makefile.am | 3 +- .../functional/tmpfile/tmpfile_stat_mode.c | 121 ++++++++++++++++++ 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 3f3b2e2dc53..5660f8b0e56 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -226,6 +226,12 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + /* + * The VFS does not apply the umask, therefore it is applied here + * when POSIX ACLs are not enabled. + */ + if (!IS_POSIXACL(dir)) + mode &= ~current_umask(); zpl_vap_init(vap, dir, mode, cr); cookie = spl_fstrans_mark(); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 6579cc748f9..0d0ad720367 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -860,7 +860,8 @@ tests = ['threadsappend_001_pos'] tags = ['functional', 'threadsappend'] [tests/functional/tmpfile] -tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos'] +tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos', + 'tmpfile_stat_mode'] tags = ['functional', 'tmpfile'] [tests/functional/trim] diff --git a/tests/zfs-tests/tests/functional/tmpfile/.gitignore b/tests/zfs-tests/tests/functional/tmpfile/.gitignore index b7a19481ad2..de014c5256c 100644 --- a/tests/zfs-tests/tests/functional/tmpfile/.gitignore +++ b/tests/zfs-tests/tests/functional/tmpfile/.gitignore @@ -2,3 +2,4 @@ /tmpfile_001_pos /tmpfile_002_pos /tmpfile_003_pos +/tmpfile_stat_mode diff --git a/tests/zfs-tests/tests/functional/tmpfile/Makefile.am b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am index 411445217a6..35a1f44c169 100644 --- a/tests/zfs-tests/tests/functional/tmpfile/Makefile.am +++ b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am @@ -8,7 +8,8 @@ dist_pkgdata_SCRIPTS = \ pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile -pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos tmpfile_003_pos +pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos \ + tmpfile_003_pos tmpfile_stat_mode tmpfile_test_SOURCES= tmpfile_test.c tmpfile_001_pos_SOURCES = tmpfile_001_pos.c tmpfile_002_pos_SOURCES = tmpfile_002_pos.c diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c new file mode 100644 index 00000000000..bf71d429c3f --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Verify stat(2) for O_TMPFILE file considers umask. + * + * STRATEGY: + * 1. open(2) with O_TMPFILE. + * 2. linkat(2). + * 3. fstat(2)/stat(2) and verify .st_mode value. + */ + +static void +test_stat_mode(mode_t mask) +{ + struct stat st, fst; + int i, fd; + char spath[1024], dpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + mode_t masked = 0777 & ~mask; + mode_t mode; + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + umask(mask); + fd = open(penv[0], O_RDWR|O_TMPFILE, 0777); + if (fd == -1) { + perror("open"); + exit(2); + } + + if (fstat(fd, &fst) == -1) { + perror("fstat"); + close(fd); + exit(3); + } + + snprintf(spath, sizeof (spath), "/proc/self/fd/%d", fd); + snprintf(dpath, sizeof (dpath), "%s/%s", penv[0], penv[1]); + + unlink(dpath); + if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) == -1) { + perror("linkat"); + close(fd); + exit(4); + } + close(fd); + + if (stat(dpath, &st) == -1) { + perror("stat"); + exit(5); + } + unlink(dpath); + + /* Verify fstat(2) result */ + mode = fst.st_mode & 0777; + if (mode != masked) { + fprintf(stderr, "fstat(2) %o != %o\n", mode, masked); + exit(6); + } + + /* Verify stat(2) result */ + mode = st.st_mode & 0777; + if (mode != masked) { + fprintf(stderr, "stat(2) %o != %o\n", mode, masked); + exit(7); + } +} + +int +main(int argc, char *argv[]) +{ + fprintf(stdout, "Verify stat(2) for O_TMPFILE file considers umask.\n"); + + test_stat_mode(0022); + test_stat_mode(0077); + + return (0); +} From 0f256176d9c782faf08bb238f9bb1f975b4792fd Mon Sep 17 00:00:00 2001 From: Richard Laager Date: Mon, 16 Dec 2019 18:54:51 -0600 Subject: [PATCH 290/325] initramfs: setup keymapping and video for prompts From Steve Langasek : > The poorly-named 'FRAMEBUFFER' option in initramfs-tools controls > whether the console_setup and plymouth scripts are included and used > in the initramfs. These are required for any initramfs which will be > prompting for user input: console_setup because without it the user's > configured keymap will not be set up, and plymouth because you are > not guaranteed to have working video output in the initramfs without > it (e.g. some nvidia+UEFI configurations with the default GRUB > behavior). > The zfs initramfs script may need to prompt the user for passphrases > for encrypted zfs datasets, and we don't know definitively whether > this is the case or not at the time the initramfs is constructed (and > it's difficult to dynamically populate initramfs config variables > anyway), therefore the zfs-initramfs package should just set > FRAMEBUFFER=yes in a conf snippet the same way that the > cryptsetup-initramfs package does > (/usr/share/initramfs-tools/conf-hooks.d/cryptsetup). https://bugs.launchpad.net/ubuntu/+source/zfs-linux/+bug/1856408 Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Signed-off-by: Steve Langasek Signed-off-by: Richard Laager Closes #9723 --- contrib/initramfs/conf-hooks.d/zfs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/contrib/initramfs/conf-hooks.d/zfs b/contrib/initramfs/conf-hooks.d/zfs index 29950cac04b..b86d36223e3 100644 --- a/contrib/initramfs/conf-hooks.d/zfs +++ b/contrib/initramfs/conf-hooks.d/zfs @@ -1,2 +1,9 @@ # Force the inclusion of Busybox in the initramfs. BUSYBOX=y + +# Setup the keyboard mapping so passphrases can be entered correctly. +KEYMAP=y + +# Require the plymouth script to guarantee working video for the passphrase +# prompting. +FRAMEBUFFER=y From fb244566c29fc478d08b7852a4c52187ed3f3463 Mon Sep 17 00:00:00 2001 From: Garrett Fields Date: Tue, 17 Dec 2019 20:45:06 -0500 Subject: [PATCH 291/325] Force systems with kernel option "quiet" to display prompt for password On systems that utilize TTY for password entry, if the kernel option "quiet" is set, the system would appear to freeze on a blank screen, when in fact it is waiting for password entry from the user. Since TTY is the fallback method, this has no effect on systemd or plymouth password prompting. By temporarily setting "printk" to "7", running the command, then resuming with the original "printk" state, the user can see the password prompt. Reviewed-by: Richard Laager Reviewed-by: Brian Behlendorf Signed-off-by: Garrett Fields Closes #9731 --- contrib/initramfs/scripts/zfs.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 523694473a6..5540160621b 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -436,7 +436,11 @@ decrypt_fs() # Prompt with ZFS tty, otherwise else + # Setting "printk" temporarily to "7" will allow prompt even if kernel option "quiet" + storeprintk="$(awk '{print $1}' /proc/sys/kernel/printk)" + echo 7 > /proc/sys/kernel/printk $ZFS load-key "${ENCRYPTIONROOT}" + echo "$storeprintk" > /proc/sys/kernel/printk fi fi fi From 7f7c15c6781bd3d7e9b4bf1eecb14fd24485accf Mon Sep 17 00:00:00 2001 From: Thomas Geppert Date: Wed, 18 Dec 2019 02:50:20 +0100 Subject: [PATCH 292/325] Create symbolic links in /dev/disk/by-vdev for nvme disk devices The existing rules miss nvme disk devices because of the trailing digits in the KERNEL device name, e.g. nvme0n1. Partitions of nvme disk devices are already properly handled by the existing rule for ENV{DEVTYPE}=="partition". Reviewed-by: Brian Behlendorf Reviewed-by: Kjeld Schouten Signed-off-by: Thomas Geppert Closes #9730 --- udev/rules.d/69-vdev.rules.in | 1 + 1 file changed, 1 insertion(+) diff --git a/udev/rules.d/69-vdev.rules.in b/udev/rules.d/69-vdev.rules.in index 36a1a8ed545..e0f23efc728 100644 --- a/udev/rules.d/69-vdev.rules.in +++ b/udev/rules.d/69-vdev.rules.in @@ -6,6 +6,7 @@ ENV{DEVTYPE}=="disk", IMPORT{program}="@udevdir@/vdev_id -d %k" ENV{DEVTYPE}=="partition", IMPORT{program}="@udevdir@/vdev_id -d %k" KERNEL=="*[!0-9]", ENV{SUBSYSTEM}=="block", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}" +KERNEL=="nvme*[0-9]n*[0-9]", ENV{SUBSYSTEM}=="block", ENV{DEVTYPE}=="disk", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}" KERNEL=="*[0-9]", ENV{SUBSYSTEM}=="block", ENV{DEVTYPE}=="partition", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}-part%n" KERNEL=="dm-[0-9]*", ENV{SUBSYSTEM}=="block", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}" From 78072b7936a00df460666097e46837e0e6fccd35 Mon Sep 17 00:00:00 2001 From: Garrett Fields Date: Wed, 18 Dec 2019 15:32:31 -0500 Subject: [PATCH 293/325] Exchanged two "${ZFS} get -H -o value" commands Initramfs uses "get_fs_value()" elsewhere. Reviewed-by: Brian Behlendorf Reviewed-by: Richard Laager Reviewed-by: Kjeld Schouten-Lebbing Signed-off-by: Garrett Fields Closes #9736 --- contrib/initramfs/scripts/zfs.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 5540160621b..4b04c4be4d4 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -410,11 +410,11 @@ decrypt_fs() if [ "$(zpool list -H -o feature@encryption $(echo "${fs}" | awk -F\/ '{print $1}'))" = 'active' ]; then # Determine dataset that holds key for root dataset - ENCRYPTIONROOT=$(${ZFS} get -H -o value encryptionroot "${fs}") + ENCRYPTIONROOT="$(get_fs_value "${fs}" encryptionroot)" # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - KEYSTATUS="$(${ZFS} get -H -o value keystatus "${ENCRYPTIONROOT}")" + KEYSTATUS="$(get_fs_value "${ENCRYPTIONROOT}" keystatus)" # Continue only if the key needs to be loaded [ "$KEYSTATUS" = "unavailable" ] || return 0 TRY_COUNT=3 From bf01567e4e4a438adba9e523d34d79f22797e238 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 13 Dec 2019 23:56:37 +0000 Subject: [PATCH 294/325] cppcheck: (error) Uninitialized variable Resolve the following uninitialized variable warnings. In practice these were unreachable due to the goto. Replacing the goto with a return resolves the warning and yields more readable code. [module/icp/algs/modes/ccm.c:892]: (error) Uninitialized variable: ccm_param [module/icp/algs/modes/ccm.c:893]: (error) Uninitialized variable: ccm_param [module/icp/algs/modes/gcm.c:564]: (error) Uninitialized variable: gcm_param [module/icp/algs/modes/gcm.c:565]: (error) Uninitialized variable: gcm_param [module/icp/algs/modes/gcm.c:599]: (error) Uninitialized variable: gmac_param [module/icp/algs/modes/gcm.c:600]: (error) Uninitialized variable: gmac_param Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/icp/algs/modes/ccm.c | 7 ++----- module/icp/algs/modes/gcm.c | 10 ++++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c index fb41194f817..f4075f50394 100644 --- a/module/icp/algs/modes/ccm.c +++ b/module/icp/algs/modes/ccm.c @@ -885,15 +885,13 @@ ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, ccm_ctx->ccm_flags |= CCM_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize, ccm_param->authData, ccm_param->ulAuthDataSize, block_size, encrypt_block, xor_block) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (!is_encrypt_init) { /* allocate buffer for storing decrypted plaintext */ @@ -903,7 +901,6 @@ ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, rv = CRYPTO_HOST_MEMORY; } } -out: return (rv); } diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index efbf0fea969..014e90ceff8 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -553,8 +553,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GCM_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, @@ -562,7 +561,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -out: + return (rv); } @@ -588,8 +587,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GMAC_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, @@ -597,7 +595,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -out: + return (rv); } From 363d7332f2a16b63429c7a0ef06cbb309c7495d4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 14 Dec 2019 00:07:48 +0000 Subject: [PATCH 295/325] cppcheck: (error) Uninitialized variable As of cppcheck 1.82 warnings are issued when using the list_for_each_* functions with an uninitialized variable. Functionally, this is fine but to resolve the warning initialize these variables. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/spl/spl-generic.c | 2 +- module/spl/spl-kmem-cache.c | 13 +++++++------ module/spl/spl-kmem.c | 4 ++-- module/spl/spl-kstat.c | 6 +++--- module/spl/spl-proc.c | 4 ++-- module/spl/spl-taskq.c | 12 ++++++------ module/spl/spl-tsd.c | 2 +- module/spl/spl-vmem.c | 2 +- 8 files changed, 23 insertions(+), 22 deletions(-) diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index 1deb2f444cd..c90ea81f29e 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -649,7 +649,7 @@ static void __init spl_random_init(void) { uint64_t s[2]; - int i; + int i = 0; get_random_bytes(s, sizeof (s)); diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 9acff541449..7baf56de6f9 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -312,7 +312,7 @@ static spl_kmem_slab_t * spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { spl_kmem_slab_t *sks; - spl_kmem_obj_t *sko, *n; + spl_kmem_obj_t *sko; void *base, *obj; uint32_t obj_size, offslab_size = 0; int i, rc = 0; @@ -356,6 +356,7 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) out: if (rc) { + spl_kmem_obj_t *n = NULL; if (skc->skc_flags & KMC_OFFSLAB) list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { @@ -405,8 +406,8 @@ spl_slab_free(spl_kmem_slab_t *sks, static void spl_slab_reclaim(spl_kmem_cache_t *skc) { - spl_kmem_slab_t *sks, *m; - spl_kmem_obj_t *sko, *n; + spl_kmem_slab_t *sks = NULL, *m = NULL; + spl_kmem_obj_t *sko = NULL, *n = NULL; LIST_HEAD(sks_list); LIST_HEAD(sko_list); uint32_t size = 0; @@ -802,7 +803,7 @@ spl_magazine_free(spl_kmem_magazine_t *skm) static int spl_magazine_create(spl_kmem_cache_t *skc) { - int i; + int i = 0; if (skc->skc_flags & KMC_NOMAGAZINE) return (0); @@ -833,7 +834,7 @@ static void spl_magazine_destroy(spl_kmem_cache_t *skc) { spl_kmem_magazine_t *skm; - int i; + int i = 0; if (skc->skc_flags & KMC_NOMAGAZINE) return; @@ -1617,7 +1618,7 @@ static spl_shrinker_t __spl_kmem_cache_generic_shrinker(struct shrinker *shrink, struct shrink_control *sc) { - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; int alloc = 0; /* diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 824b5e89f50..cee69ad4346 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -303,7 +303,7 @@ kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, const void *addr) { struct hlist_head *head; - struct hlist_node *node; + struct hlist_node *node = NULL; struct kmem_debug *p; unsigned long flags; @@ -500,7 +500,7 @@ static void spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) { unsigned long flags; - kmem_debug_t *kd; + kmem_debug_t *kd = NULL; char str[17]; spin_lock_irqsave(lock, flags); diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c index 1f67bf157f0..c97b6d6cbcb 100644 --- a/module/spl/spl-kstat.c +++ b/module/spl/spl-kstat.c @@ -431,7 +431,7 @@ static struct seq_operations kstat_seq_ops = { static kstat_module_t * kstat_find_module(char *name) { - kstat_module_t *module; + kstat_module_t *module = NULL; list_for_each_entry(module, &kstat_module_list, ksm_module_list) { if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0) @@ -624,7 +624,7 @@ static int kstat_detect_collision(kstat_proc_entry_t *kpep) { kstat_module_t *module; - kstat_proc_entry_t *tmp; + kstat_proc_entry_t *tmp = NULL; char *parent; char *cp; @@ -659,7 +659,7 @@ kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, const struct file_operations *file_ops, void *data) { kstat_module_t *module; - kstat_proc_entry_t *tmp; + kstat_proc_entry_t *tmp = NULL; ASSERT(kpep); diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index 13eaa6301d7..c0c13913cdf 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -144,7 +144,7 @@ proc_doslab(struct ctl_table *table, int write, int rc = 0; unsigned long min = 0, max = ~0, val = 0, mask; spl_ctl_table dummy = *table; - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; dummy.data = &val; dummy.proc_handler = &proc_dointvec; @@ -249,7 +249,7 @@ static int taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag) { taskq_t *tq = p; - taskq_thread_t *tqt; + taskq_thread_t *tqt = NULL; spl_wait_queue_entry_t *wq; struct task_struct *tsk; taskq_ent_t *tqe; diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c index a39f94e4cc2..a65c95615db 100644 --- a/module/spl/spl-taskq.c +++ b/module/spl/spl-taskq.c @@ -82,7 +82,7 @@ task_km_flags(uint_t flags) static int taskq_find_by_name(const char *name) { - struct list_head *tql; + struct list_head *tql = NULL; taskq_t *tq; list_for_each_prev(tql, &tq_list) { @@ -211,7 +211,7 @@ task_expire_impl(taskq_ent_t *t) { taskq_ent_t *w; taskq_t *tq = t->tqent_taskq; - struct list_head *l; + struct list_head *l = NULL; unsigned long flags; spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); @@ -298,7 +298,7 @@ static void taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt) { taskq_thread_t *w; - struct list_head *l; + struct list_head *l = NULL; ASSERT(tq); ASSERT(tqt); @@ -321,7 +321,7 @@ taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt) static taskq_ent_t * taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id) { - struct list_head *l; + struct list_head *l = NULL; taskq_ent_t *t; list_for_each(l, lh) { @@ -347,7 +347,7 @@ static taskq_ent_t * taskq_find(taskq_t *tq, taskqid_t id) { taskq_thread_t *tqt; - struct list_head *l; + struct list_head *l = NULL; taskq_ent_t *t; t = taskq_find_list(tq, &tq->tq_delay_list, id); @@ -1198,7 +1198,7 @@ param_set_taskq_kick(const char *val, struct kernel_param *kp) #endif { int ret; - taskq_t *tq; + taskq_t *tq = NULL; taskq_ent_t *t; unsigned long flags; diff --git a/module/spl/spl-tsd.c b/module/spl/spl-tsd.c index 14342d5a618..b955ed65470 100644 --- a/module/spl/spl-tsd.c +++ b/module/spl/spl-tsd.c @@ -98,7 +98,7 @@ static tsd_hash_table_t *tsd_hash_table = NULL; static tsd_hash_entry_t * tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid) { - struct hlist_node *node; + struct hlist_node *node = NULL; tsd_hash_entry_t *entry; tsd_hash_bin_t *bin; ulong_t hash; diff --git a/module/spl/spl-vmem.c b/module/spl/spl-vmem.c index e1a84a9117b..a2630ecdd18 100644 --- a/module/spl/spl-vmem.c +++ b/module/spl/spl-vmem.c @@ -50,7 +50,7 @@ EXPORT_SYMBOL(zio_arena); size_t vmem_size(vmem_t *vmp, int typemask) { - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; size_t alloc = VMEM_FLOOR_SIZE; if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE)) From 603ae6a8c0895e1e46d556312bab7b777104949f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 14 Dec 2019 00:16:42 +0000 Subject: [PATCH 296/325] cppcheck: (error) Shifting signed 64-bit value by 63 bits As of cppcheck 1.82 surpress the warning regarding shifting too many bits for __divdi3() implemention. The algorithm used here is correct. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/spl/spl-generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index c90ea81f29e..92f059a9054 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -273,7 +273,9 @@ int64_t __divdi3(int64_t u, int64_t v) { int64_t q, t; + // cppcheck-suppress shiftTooManyBitsSigned q = __udivdi3(abs64(u), abs64(v)); + // cppcheck-suppress shiftTooManyBitsSigned t = (u ^ v) >> 63; // If u, v have different return ((q ^ t) - t); // signs, negate q. } From 1074834f770064470142fa32190864312b686fcc Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 16 Dec 2019 10:40:29 -0800 Subject: [PATCH 297/325] cppcheck: (error) Memory leak: vtoc Resolve the reported memory leak by using a dedicated local vptr variable to store the pointer reported by calloc(). Only assign the passed **vtoc function argument on success, in all other cases vptr is freed. [lib/libefi/rdwr_efi.c:403]: (error) Memory leak: vtoc [lib/libefi/rdwr_efi.c:422]: (error) Memory leak: vtoc [lib/libefi/rdwr_efi.c:440]: (error) Memory leak: vtoc [lib/libefi/rdwr_efi.c:454]: (error) Memory leak: vtoc [lib/libefi/rdwr_efi.c:470]: (error) Memory leak: vtoc Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- lib/libefi/rdwr_efi.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 5311059ee81..d880011c7a5 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -399,10 +399,11 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) length = sizeof (struct dk_gpt) + sizeof (struct dk_part) * (nparts - 1); - if ((*vtoc = calloc(1, length)) == NULL) + vptr = calloc(1, length); + if (vptr == NULL) return (-1); - vptr = *vtoc; + *vtoc = vptr; vptr->efi_version = EFI_VERSION_CURRENT; vptr->efi_lbasize = lbsize; @@ -431,30 +432,32 @@ efi_alloc_and_read(int fd, struct dk_gpt **vtoc) int rval; uint32_t nparts; int length; + struct dk_gpt *vptr; /* figure out the number of entries that would fit into 16K */ nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); length = (int) sizeof (struct dk_gpt) + (int) sizeof (struct dk_part) * (nparts - 1); - if ((*vtoc = calloc(1, length)) == NULL) + vptr = calloc(1, length); + + if (vptr == NULL) return (VT_ERROR); - (*vtoc)->efi_nparts = nparts; - rval = efi_read(fd, *vtoc); + vptr->efi_nparts = nparts; + rval = efi_read(fd, vptr); - if ((rval == VT_EINVAL) && (*vtoc)->efi_nparts > nparts) { + if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { void *tmp; length = (int) sizeof (struct dk_gpt) + - (int) sizeof (struct dk_part) * - ((*vtoc)->efi_nparts - 1); - nparts = (*vtoc)->efi_nparts; - if ((tmp = realloc(*vtoc, length)) == NULL) { - free (*vtoc); + (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); + nparts = vptr->efi_nparts; + if ((tmp = realloc(vptr, length)) == NULL) { + free(vptr); *vtoc = NULL; return (VT_ERROR); } else { - *vtoc = tmp; - rval = efi_read(fd, *vtoc); + vptr = tmp; + rval = efi_read(fd, vptr); } } @@ -463,8 +466,10 @@ efi_alloc_and_read(int fd, struct dk_gpt **vtoc) (void) fprintf(stderr, "read of EFI table failed, rval=%d\n", rval); } - free (*vtoc); + free(vptr); *vtoc = NULL; + } else { + *vtoc = vptr; } return (rval); From d01290f44da1bbdc9e520692a1b3ee5001ba762c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 16 Dec 2019 10:55:11 -0800 Subject: [PATCH 298/325] cppcheck: (warning) Possible null pointer dereference: dnp The dnp argument can only be set to NULL when the DNODE_DRY_RUN flag is set. In which case, an early return path will be executed and a NULL pointer dereference at the given location is impossible. Add an additional ASSERT to silence the cppcheck warning and document that dbp must never be NULL at the point in the function. [module/zfs/dnode.c:1566]: (warning) Possible null pointer deref: dnp Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/zfs/dnode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index ec297a242b2..7acfc36c87d 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1561,6 +1561,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, dnode_slots_rele(dnc, idx, slots); DNODE_VERIFY(dn); + ASSERT3P(dnp, !=, NULL); ASSERT3P(dn->dn_dbuf, ==, db); ASSERT3U(dn->dn_object, ==, object); dbuf_rele(db, FTAG); From 1c27877ab2535b0a809f83383e727a6a8c893be1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 16 Dec 2019 15:49:28 -0800 Subject: [PATCH 299/325] cppcheck: (error) Null pointer dereference: who_perm As indicated by the VERIFY the local who_perm variable can never be NULL in parse_fs_perm(). Due to the existence of the is_set conditional, which is always true, cppcheck 1.88 was reporting a possible NULL reference. Resolve the issue by removing the extraneous is_set variable. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- cmd/zfs/zfs_main.c | 90 +++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index ced60a690e1..fa1c6aa3028 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -4896,7 +4896,6 @@ parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) zfs_deleg_who_type_t perm_type = name[0]; char perm_locality = name[1]; const char *perm_name = name + 3; - boolean_t is_set = B_TRUE; who_perm_t *who_perm = NULL; assert('$' == name[2]); @@ -4926,57 +4925,56 @@ parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) assert(!"unhandled zfs_deleg_who_type_t"); } - if (is_set) { - who_perm_node_t *found_node = NULL; - who_perm_node_t *node = safe_malloc( - sizeof (who_perm_node_t)); - who_perm = &node->who_perm; - uu_avl_index_t idx = 0; - - uu_avl_node_init(node, &node->who_avl_node, avl_pool); - who_perm_init(who_perm, fsperm, perm_type, perm_name); - - if ((found_node = uu_avl_find(avl, node, NULL, &idx)) - == NULL) { - if (avl == fsperm->fsp_uge_avl) { - uid_t rid = 0; - struct passwd *p = NULL; - struct group *g = NULL; - const char *nice_name = NULL; - - switch (perm_type) { - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - rid = atoi(perm_name); - p = getpwuid(rid); - if (p) - nice_name = p->pw_name; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - rid = atoi(perm_name); - g = getgrgid(rid); - if (g) - nice_name = g->gr_name; - break; + who_perm_node_t *found_node = NULL; + who_perm_node_t *node = safe_malloc( + sizeof (who_perm_node_t)); + who_perm = &node->who_perm; + uu_avl_index_t idx = 0; - default: - break; - } + uu_avl_node_init(node, &node->who_avl_node, avl_pool); + who_perm_init(who_perm, fsperm, perm_type, perm_name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) { + if (avl == fsperm->fsp_uge_avl) { + uid_t rid = 0; + struct passwd *p = NULL; + struct group *g = NULL; + const char *nice_name = NULL; + + switch (perm_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + rid = atoi(perm_name); + p = getpwuid(rid); + if (p) + nice_name = p->pw_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + rid = atoi(perm_name); + g = getgrgid(rid); + if (g) + nice_name = g->gr_name; + break; - if (nice_name != NULL) - (void) strlcpy( - node->who_perm.who_ug_name, - nice_name, 256); + default: + break; } - uu_avl_insert(avl, node, idx); - } else { - node = found_node; - who_perm = &node->who_perm; + if (nice_name != NULL) + (void) strlcpy( + node->who_perm.who_ug_name, + nice_name, 256); } + + uu_avl_insert(avl, node, idx); + } else { + node = found_node; + who_perm = &node->who_perm; } - VERIFY3P(who_perm, !=, NULL); + + assert(who_perm != NULL); (void) parse_who_perm(who_perm, nvl2, perm_locality); } From 180c41e0b7efcf261814e88864857a18fe5901bb Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 16 Dec 2019 15:53:43 -0800 Subject: [PATCH 300/325] cppcheck: (error) Address of local auto-variable assigned Suppress autoVariables warnings in the lua interpreter. The usage here while unconventional in intentional and the same as upstream. [module/lua/ldebug.c:327]: (error) Address of local auto-variable assigned to a function parameter. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/lua/ldebug.c | 1 + module/lua/ldo.c | 1 + 2 files changed, 2 insertions(+) diff --git a/module/lua/ldebug.c b/module/lua/ldebug.c index 15fe91b0b76..32bb908cd50 100644 --- a/module/lua/ldebug.c +++ b/module/lua/ldebug.c @@ -324,6 +324,7 @@ static void kname (Proto *p, int pc, int c, const char **name) { if (ISK(c)) { /* is 'c' a constant? */ TValue *kvalue = &p->k[INDEXK(c)]; if (ttisstring(kvalue)) { /* literal constant? */ + // cppcheck-suppress autoVariables *name = svalue(kvalue); /* it is its own name */ return; } diff --git a/module/lua/ldo.c b/module/lua/ldo.c index 59d0b6a2c29..d550cb5bfdb 100644 --- a/module/lua/ldo.c +++ b/module/lua/ldo.c @@ -168,6 +168,7 @@ int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) { struct lua_longjmp lj; lj.status = LUA_OK; lj.previous = L->errorJmp; /* chain new error handler */ + // cppcheck-suppress autoVariables L->errorJmp = &lj; LUAI_TRY(L, &lj, (*f)(L, ud); From 9791683901a1471026c7ebf3df798e2676e118ef Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 16 Dec 2019 16:03:31 -0800 Subject: [PATCH 301/325] cppcheck: (warning) Possible null pointer dereference: nvh Move the 'nvh = (void *)buf' assignment after the 'buf == NULL' check to resolve the warning. Interestingly, cppcheck 1.88 correctly determines that the existing code is safe, while cppcheck 1.86 reports the warning. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9732 --- module/nvpair/nvpair.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index c5bd98ebd05..a47b94c48e0 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -2558,7 +2558,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, #else int host_endian = 0; #endif /* _LITTLE_ENDIAN */ - nvs_header_t *nvh = (void *)buf; + nvs_header_t *nvh; if (buflen == NULL || nvl == NULL || (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) @@ -2577,6 +2577,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, if (buf == NULL || *buflen < sizeof (nvs_header_t)) return (EINVAL); + nvh = (void *)buf; nvh->nvh_encoding = encoding; nvh->nvh_endian = nvl_endian = host_endian; nvh->nvh_reserved1 = 0; @@ -2588,6 +2589,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, return (EINVAL); /* get method of encoding from first byte */ + nvh = (void *)buf; encoding = nvh->nvh_encoding; nvl_endian = nvh->nvh_endian; break; From e05c965d5bac00b9036046d26ea3b9665b65da68 Mon Sep 17 00:00:00 2001 From: loli10K Date: Sat, 28 Dec 2019 07:28:37 -0600 Subject: [PATCH 302/325] Fix for ARC sysctls ignored at runtime This change leverage module_param_call() to run arc_tuning_update() immediately after the ARC tunable has been updated as suggested in cffa837 code review. A simple test case is added to the ZFS Test Suite to prevent future regressions in functionality. This is a backport of #9489 provided from: https://github.com/zfsonlinux/zfs/pull/9776#issuecomment-569418370 Signed-off-by: loli10K --- module/zfs/arc.c | 72 +++++++++++++++---- tests/runfiles/linux.run | 3 +- .../tests/functional/arc/Makefile.am | 1 + .../arc/arcstats_runtime_tuning.ksh | 46 ++++++++++++ tests/zfs-tests/tests/perf/perf.shlib | 17 +++++ 5 files changed, 124 insertions(+), 15 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 03097cd83dc..a16689dc6b0 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -296,6 +296,7 @@ #include #include #include +#include #endif #include #include @@ -7554,8 +7555,10 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) /* * Called during module initialization and periodically thereafter to - * apply reasonable changes to the exposed performance tunings. Non-zero - * zfs_* values which differ from the currently set values will be applied. + * apply reasonable changes to the exposed performance tunings. Can also be + * called explicitly by param_set_arc_*() functions when ARC tunables are + * updated manually. Non-zero zfs_* values which differ from the currently set + * values will be applied. */ static void arc_tuning_update(void) @@ -9389,6 +9392,35 @@ l2arc_stop(void) } #if defined(_KERNEL) +static int +param_set_arc_long(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_long(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} + +static int +param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_int(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} + + EXPORT_SYMBOL(arc_buf_size); EXPORT_SYMBOL(arc_write); EXPORT_SYMBOL(arc_read); @@ -9398,20 +9430,25 @@ EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); /* BEGIN CSTYLED */ -module_param(zfs_arc_min, ulong, 0644); +module_param_call(zfs_arc_min, param_set_arc_long, param_get_long, + &zfs_arc_min, 0644); MODULE_PARM_DESC(zfs_arc_min, "Min arc size"); -module_param(zfs_arc_max, ulong, 0644); +module_param_call(zfs_arc_max, param_set_arc_long, param_get_long, + &zfs_arc_max, 0644); MODULE_PARM_DESC(zfs_arc_max, "Max arc size"); -module_param(zfs_arc_meta_limit, ulong, 0644); +module_param_call(zfs_arc_meta_limit, param_set_arc_long, param_get_long, + &zfs_arc_meta_limit, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); -module_param(zfs_arc_meta_limit_percent, ulong, 0644); +module_param_call(zfs_arc_meta_limit_percent, param_set_arc_long, + param_get_long, &zfs_arc_meta_limit_percent, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit_percent, "Percent of arc size for arc meta limit"); -module_param(zfs_arc_meta_min, ulong, 0644); +module_param_call(zfs_arc_meta_min, param_set_arc_long, param_get_long, + &zfs_arc_meta_min, 0644); MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata"); module_param(zfs_arc_meta_prune, int, 0644); @@ -9424,20 +9461,23 @@ MODULE_PARM_DESC(zfs_arc_meta_adjust_restarts, module_param(zfs_arc_meta_strategy, int, 0644); MODULE_PARM_DESC(zfs_arc_meta_strategy, "Meta reclaim strategy"); -module_param(zfs_arc_grow_retry, int, 0644); +module_param_call(zfs_arc_grow_retry, param_set_arc_int, param_get_int, + &zfs_arc_grow_retry, 0644); MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size"); module_param(zfs_arc_p_dampener_disable, int, 0644); MODULE_PARM_DESC(zfs_arc_p_dampener_disable, "disable arc_p adapt dampener"); -module_param(zfs_arc_shrink_shift, int, 0644); +module_param_call(zfs_arc_shrink_shift, param_set_arc_int, param_get_int, + &zfs_arc_shrink_shift, 0644); MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)"); module_param(zfs_arc_pc_percent, uint, 0644); MODULE_PARM_DESC(zfs_arc_pc_percent, "Percent of pagecache to reclaim arc to"); -module_param(zfs_arc_p_min_shift, int, 0644); +module_param_call(zfs_arc_p_min_shift, param_set_arc_int, param_get_int, + &zfs_arc_p_min_shift, 0644); MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p"); module_param(zfs_arc_average_blocksize, int, 0444); @@ -9446,7 +9486,8 @@ MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size"); module_param(zfs_compressed_arc_enabled, int, 0644); MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers"); -module_param(zfs_arc_min_prefetch_ms, int, 0644); +module_param_call(zfs_arc_min_prefetch_ms, param_set_arc_int, param_get_int, + &zfs_arc_min_prefetch_ms, 0644); MODULE_PARM_DESC(zfs_arc_min_prefetch_ms, "Min life of prefetch block in ms"); module_param(zfs_arc_min_prescient_prefetch_ms, int, 0644); @@ -9480,14 +9521,17 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup"); module_param(l2arc_norw, int, 0644); MODULE_PARM_DESC(l2arc_norw, "No reads during writes"); -module_param(zfs_arc_lotsfree_percent, int, 0644); +module_param_call(zfs_arc_lotsfree_percent, param_set_arc_int, param_get_int, + &zfs_arc_lotsfree_percent, 0644); MODULE_PARM_DESC(zfs_arc_lotsfree_percent, "System free memory I/O throttle in bytes"); -module_param(zfs_arc_sys_free, ulong, 0644); +module_param_call(zfs_arc_sys_free, param_set_arc_long, param_get_long, + &zfs_arc_sys_free, 0644); MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes"); -module_param(zfs_arc_dnode_limit, ulong, 0644); +module_param_call(zfs_arc_dnode_limit, param_set_arc_long, param_get_long, + &zfs_arc_dnode_limit, 0644); MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc"); module_param(zfs_arc_dnode_limit_percent, ulong, 0644); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 0d0ad720367..1361a8b53ea 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -33,7 +33,8 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', tags = ['functional', 'alloc_class'] [tests/functional/arc] -tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos'] +tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos', + 'arcstats_runtime_tuning'] tags = ['functional', 'arc'] [tests/functional/atime] diff --git a/tests/zfs-tests/tests/functional/arc/Makefile.am b/tests/zfs-tests/tests/functional/arc/Makefile.am index 22704fa5181..809d0346f87 100644 --- a/tests/zfs-tests/tests/functional/arc/Makefile.am +++ b/tests/zfs-tests/tests/functional/arc/Makefile.am @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/arc dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ + arcstats_runtime_tuning.ksh \ dbufstats_001_pos.ksh \ dbufstats_002_pos.ksh \ dbufstats_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh new file mode 100755 index 00000000000..6d007aecf84 --- /dev/null +++ b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh @@ -0,0 +1,46 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/perf/perf.shlib + +function cleanup +{ + # Set tunables to their recorded actual size and then to their original + # value: this works for previously unconfigured tunables. + log_must set_tunable64 zfs_arc_min "$MINSIZE" + log_must set_tunable64 zfs_arc_min "$ZFS_ARC_MIN" + log_must set_tunable64 zfs_arc_max "$MAXSIZE" + log_must set_tunable64 zfs_arc_max "$ZFS_ARC_MAX" +} + +log_onexit cleanup + +ZFS_ARC_MAX="$(get_tunable zfs_arc_max)" +ZFS_ARC_MIN="$(get_tunable zfs_arc_min)" +MINSIZE="$(get_min_arc_size)" +MAXSIZE="$(get_max_arc_size)" + +log_assert "ARC tunables should be updated dynamically" + +for size in $((MAXSIZE/4)) $((MAXSIZE/3)) $((MAXSIZE/2)) $MAXSIZE; do + log_must set_tunable64 zfs_arc_max "$size" + log_must test "$(get_max_arc_size)" == "$size" + log_must set_tunable64 zfs_arc_min "$size" + log_must test "$(get_min_arc_size)" == "$size" +done + +log_pass "ARC tunables can be updated dynamically" diff --git a/tests/zfs-tests/tests/perf/perf.shlib b/tests/zfs-tests/tests/perf/perf.shlib index 69e61e9fd12..e2e84ca02ac 100644 --- a/tests/zfs-tests/tests/perf/perf.shlib +++ b/tests/zfs-tests/tests/perf/perf.shlib @@ -373,6 +373,23 @@ function get_directory echo $directory } +function get_min_arc_size +{ + if is_linux; then + typeset -l min_arc_size=`awk '$1 == "c_min" { print $3 }' \ + /proc/spl/kstat/zfs/arcstats` + else + typeset -l min_arc_size=$(dtrace -qn 'BEGIN { + printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); + exit(0); + }') + fi + + [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" + + echo $min_arc_size +} + function get_max_arc_size { if is_linux; then From b051968de39a22bc9a163dccf9e7b7d4a4706edd Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 19 Dec 2019 15:32:56 -0800 Subject: [PATCH 303/325] ZTS: Various test case fixes * devices_001_pos and devices_002_neg - Failing after FreeBSD ZTS merged due to missing 'function' keyword for create_dev_file_linux. * pool_state - Occasionally fails due to an insufficient delay before checking 'zpool status'. Increasing the delay from 1 to 3 seconds resolved the issue in local testing. * procfs_list_basic - Fails when run in-tree because the logged command is actually 'lt-zfs'. Updated the regex accordingly. Reviewed-by: John Kennedy Reviewed-by: Tony Hutter Reviewed-by: Ryan Moeller Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9748 --- tests/zfs-tests/tests/functional/procfs/pool_state.ksh | 2 +- tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh index a3afe0c429d..6543f87c46d 100755 --- a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh +++ b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh @@ -137,7 +137,7 @@ remove_disk $SDISK # background since the command will hang when the pool gets suspended. The # command will resume and exit after we restore the missing disk later on. zpool scrub $TESTPOOL2 & -sleep 1 # Give the scrub some time to run before we check if it fails +sleep 3 # Give the scrub some time to run before we check if it fails log_must check_all $TESTPOOL2 "SUSPENDED" diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh index c9eff3649ca..88911aac6e8 100755 --- a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh +++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh @@ -48,7 +48,7 @@ function cleanup function count_snap_cmds { typeset expected_count=$1 - count=$(grep "command: zfs snapshot $FS@testsnapshot" | wc -l) + count=$(grep -E "command: (lt-)?zfs snapshot $FS@testsnapshot" | wc -l) log_must eval "[[ $count -eq $expected_count ]]" } From f28e58b479c3a9b8cecbdeae0e7bd1633d6e11e5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 23 Dec 2019 14:24:36 -0800 Subject: [PATCH 304/325] Update maximum kernel version to 5.4 Increase the maximum supported kernel version to 5.4. This was verified using the Fedora 5.4.2-300.fc31.x86_64 kernel. Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9754 Closes #9759 --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index 960a2b73ab3..8d2d6e8abb5 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS on Linux -Linux-Maximum: 5.3 +Linux-Maximum: 5.4 Linux-Minimum: 2.6.32 From 421f8a2be0d7507f6783b6ffe0f7c5760668ffaf Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 Dec 2019 10:49:07 -0800 Subject: [PATCH 305/325] ZTS: Test case failures * large_dnode_008_pos - Force a pool sync before invoking zdb to ensure the updated dnode blocks have been persisted to disk. * refreserv_raidz - Wait for the /dev/zvol links to be both created and removed, this is important because the same device volume names are being used repeatedly. * btree_test - Add missing .gitignore file for btree_test binary. Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9769 --- tests/zfs-tests/cmd/btree_test/.gitignore | 1 + .../functional/features/large_dnode/large_dnode_008_pos.ksh | 3 ++- tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/cmd/btree_test/.gitignore diff --git a/tests/zfs-tests/cmd/btree_test/.gitignore b/tests/zfs-tests/cmd/btree_test/.gitignore new file mode 100644 index 00000000000..73777c4c1f4 --- /dev/null +++ b/tests/zfs-tests/cmd/btree_test/.gitignore @@ -0,0 +1 @@ +/btree_test diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh index eac292cbe06..71e17517132 100755 --- a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh @@ -39,7 +39,7 @@ verify_runnable "both" function cleanup { - datasetexists $TEST_FS && log_must zfs destroy $TEST_FS + datasetexists $TEST_FS && destroy_dataset $TEST_FS } function verify_dnode_packing @@ -71,6 +71,7 @@ for ((i=0; i < 100; i++)); do done log_must wait +sync_pool $TESTPOOL verify_dnode_packing diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh index 7b1f84afe25..9f25242de60 100755 --- a/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh @@ -121,6 +121,7 @@ for parity in 1 2 3; do log_must test "$deltapct" -le $maxpct log_must_busy zfs destroy "$vol" + block_device_wait done log_must_busy zpool destroy "$TESTPOOL" From bb04f9c1956dfc4f18f887c6c0d2f6cf62dc3555 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 Dec 2019 10:50:23 -0800 Subject: [PATCH 306/325] Cancel initialize and TRIM before vdev_metaslab_fini() Any running 'zpool initialize' or TRIM must be cancelled prior to the vdev_metaslab_fini() call in spa_vdev_remove_log() which will unload the metaslabs and set ms->ms_group == NULL. Reviewed-by: Igor Kozhukhov Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #8602 Closes #9751 --- module/zfs/vdev_removal.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 1ec18e7cf30..340de255720 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -1861,6 +1861,13 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) spa_vdev_config_exit(spa, NULL, *txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); + /* + * Cancel any initialize or TRIM which was in progress. + */ + vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED); + vdev_trim_stop_all(vd, VDEV_TRIM_CANCELED); + vdev_autotrim_stop_wait(vd); + /* * Evacuate the device. We don't hold the config lock as * writer since we need to do I/O but we do keep the @@ -1891,12 +1898,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) vdev_metaslab_fini(vd); spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG); - - /* Stop initializing and TRIM */ - vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED); - vdev_trim_stop_all(vd, VDEV_TRIM_CANCELED); - vdev_autotrim_stop_wait(vd); - *txg = spa_vdev_config_enter(spa); sysevent_t *ev = spa_event_create(spa, vd, NULL, From 0d55a0957fb0eb8df431f202b066b16698b78a07 Mon Sep 17 00:00:00 2001 From: Nick Black Date: Thu, 26 Dec 2019 13:52:14 -0500 Subject: [PATCH 307/325] libspl: declare aok extern in header Rather than defining a new instance of 'aok' in every compilation unit which includes this header, there is a single instance defined in zone.c, and the header now only declares an extern. Reviewed-by: Brian Behlendorf Reviewed-by: Paul Zuchowski Signed-off-by: Nick Black Closes #9752 --- lib/libspl/include/assert.h | 2 +- lib/libspl/zone.c | 2 ++ tests/zfs-tests/tests/functional/checksum/edonr_test.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index b7b406850f7..820519c00ae 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -34,7 +34,7 @@ #include #ifndef _KERNEL -int aok; +extern int aok; #endif static inline int diff --git a/lib/libspl/zone.c b/lib/libspl/zone.c index 5ca93b224d9..4a0e600ca3b 100644 --- a/lib/libspl/zone.c +++ b/lib/libspl/zone.c @@ -27,6 +27,8 @@ #include #include +int aok = 0; + zoneid_t getzoneid() { diff --git a/tests/zfs-tests/tests/functional/checksum/edonr_test.c b/tests/zfs-tests/tests/functional/checksum/edonr_test.c index a2a924e5d89..596ef2b3368 100644 --- a/tests/zfs-tests/tests/functional/checksum/edonr_test.c +++ b/tests/zfs-tests/tests/functional/checksum/edonr_test.c @@ -42,6 +42,8 @@ typedef enum boolean { B_FALSE, B_TRUE } boolean_t; typedef unsigned long long u_longlong_t; +int aok = 0; + /* * Test messages from: * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf From 5b8f560713701e7eda4506977f8b7d71df1b5096 Mon Sep 17 00:00:00 2001 From: sam-lunt Date: Thu, 26 Dec 2019 12:55:20 -0600 Subject: [PATCH 308/325] In initramfs, do not prompt if keylocation is "file://" If the encryption key is stored in a file, the initramfs should not prompt for the password. For example, this could be the case if the boot partition is stored on removable media that is only present at boot time Reviewed-by: Brian Behlendorf Reviewed-by: Garrett Fields Reviewed-by: Richard Laager Reviewed-by: Kjeld Schouten Signed-off-by: Sam Lunt Closes #9764 --- contrib/dracut/90zfs/zfs-load-key.sh.in | 19 +++++++++++++------ contrib/initramfs/scripts/zfs.in | 8 +++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 88f43b6edc6..4e945f14abb 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -37,15 +37,22 @@ fi if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{print $1}'))" = 'active' ]; then # if the root dataset has encryption enabled ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}") + # where the key is stored (in a file or loaded via prompt) + KEYLOCATION=$(${ZFS} get -H -o value keylocation "${ENCRYPTIONROOT}") if ! [ "${ENCRYPTIONROOT}" = "-" ]; then KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" # continue only if the key needs to be loaded [ "$KEYSTATUS" = "unavailable" ] || exit 0 - # decrypt them - TRY_COUNT=5 - while [ $TRY_COUNT -gt 0 ]; do - systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break - TRY_COUNT=$((TRY_COUNT - 1)) - done + # if key is stored in a file, do not prompt + if ! [ "${KEYLOCATION}" = "prompt" ]; then + zfs load-key "${ENCRYPTIONROOT}" + else + # decrypt them + TRY_COUNT=5 + while [ $TRY_COUNT -gt 0 ]; do + systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break + TRY_COUNT=$((TRY_COUNT - 1)) + done + fi fi fi diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 4b04c4be4d4..4bbdf53a77d 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -411,6 +411,7 @@ decrypt_fs() # Determine dataset that holds key for root dataset ENCRYPTIONROOT="$(get_fs_value "${fs}" encryptionroot)" + KEYLOCATION="$(get_fs_value "${ENCRYPTIONROOT}" keylocation)" # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then @@ -418,8 +419,13 @@ decrypt_fs() # Continue only if the key needs to be loaded [ "$KEYSTATUS" = "unavailable" ] || return 0 TRY_COUNT=3 + + # If key is stored in a file, do not prompt + if ! [ "${KEYLOCATION}" = "prompt" ]; then + $ZFS load-key "${ENCRYPTIONROOT}" + # Prompt with plymouth, if active - if [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then + elif [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then while [ $TRY_COUNT -gt 0 ]; do plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \ $ZFS load-key "${ENCRYPTIONROOT}" && break From da6a7f0239ac74f6a3aa9e2c8a344e07ea415888 Mon Sep 17 00:00:00 2001 From: Steve Mokris Date: Thu, 26 Dec 2019 13:57:05 -0500 Subject: [PATCH 309/325] Avoid some crashes when importing a pool with corrupt metadata - Skip invalid DVAs when importing pools in readonly mode (in addition to when the config is untrusted). - Upon encountering a DVA with a null VDEV, fail gracefully instead of panicking with a NULL pointer dereference. Reviewed-by: Pavel Zakharov Reviewed-by: Brian Behlendorf Signed-off-by: Steve Mokris Closes #9022 --- module/zfs/vdev_mirror.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 2f75fca827f..cf8402dcc80 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -282,10 +282,11 @@ vdev_mirror_map_init(zio_t *zio) } /* - * If we do not trust the pool config, some DVAs might be - * invalid or point to vdevs that do not exist. We skip them. + * If the pool cannot be written to, then infer that some + * DVAs might be invalid or point to vdevs that do not exist. + * We skip them. */ - if (!spa_trust_config(spa)) { + if (!spa_writeable(spa)) { ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); int j = 0; for (int i = 0; i < c; i++) { @@ -309,6 +310,13 @@ vdev_mirror_map_init(zio_t *zio) mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); mc->mc_offset = DVA_GET_OFFSET(&dva[c]); + if (mc->mc_vd == NULL) { + kmem_free(mm, vdev_mirror_map_size( + mm->mm_children)); + zio->io_vsd = NULL; + zio->io_error = ENXIO; + return (NULL); + } } } else { /* From 16777b7dee33893f4d724140044da1840ba848ea Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 Dec 2019 12:11:27 -0800 Subject: [PATCH 310/325] ZTS: devices_001_pos and devices_002_neg Update the devices_001_pos and devices_002_neg test cases such that the special block device file created is backed by a ZFS volume. Specifying a specific device allows the major and minor numbers to be easily determined. Furthermore, this avoids the potentially dangerous behavior of opening the first block device we happen to find under /dev/. Reviewed-by: Ryan Moeller Signed-off-by: Brian Behlendorf Closes #9773 --- .../functional/devices/devices_001_pos.ksh | 18 ++- .../functional/devices/devices_002_neg.ksh | 16 ++- .../functional/devices/devices_common.kshlib | 105 ++++++++---------- .../tests/functional/devices/setup.ksh | 2 +- 4 files changed, 68 insertions(+), 73 deletions(-) diff --git a/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh b/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh index ac031ed6a52..2f2802bc65a 100755 --- a/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh @@ -42,7 +42,7 @@ # 1. Create pool and file system. # 2. Set devices=on on this file system. # 3. Separately create block device file and character file. -# 4. Separately read from those two device files. +# 4. Separately read and write from those two device files. # 5. Check the return value, and make sure it succeeds. # @@ -55,12 +55,18 @@ log_onexit cleanup log_must zfs set devices=on $TESTPOOL/$TESTFS # -# Separately create block device file and character device file, then try to -# open them and make sure it succeed. +# Create block device file backed by a ZFS volume. +# Verify it can be opened, written, and read. # -create_dev_file b $TESTDIR/$TESTFILE1 -log_must dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out count=1 +create_dev_file b $TESTDIR/$TESTFILE1 $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL +log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE1.out1 count=1 bs=128k +log_must dd if=$TESTDIR/$TESTFILE1.out1 of=$TESTDIR/$TESTFILE1 count=1 bs=128k +log_must dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out2 count=1 bs=128k +log_must cmp $TESTDIR/$TESTFILE1.out1 $TESTDIR/$TESTFILE1.out2 + +# Create character device file backed by /dev/null +# Verify it can be opened and written. create_dev_file c $TESTDIR/$TESTFILE2 -log_must dd if=$TESTDIR/$TESTFILE2 of=$TESTDIR/$TESTFILE2.out count=1 +log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE2 count=1 bs=128k log_pass "Setting devices=on on file system and testing it pass." diff --git a/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh b/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh index ce25502b818..a768c4aa6b3 100755 --- a/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh @@ -42,7 +42,7 @@ # 1. Create pool and file system. # 2. Set devices=off on this file system. # 3. Separately create block device file and character file. -# 4. Separately read from those two device files. +# 4. Separately read and write from those two device files. # 5. Check the return value, and make sure it failed. # @@ -55,12 +55,16 @@ log_onexit cleanup log_must zfs set devices=off $TESTPOOL/$TESTFS # -# Separately create block device file and character device file, then try to -# open them and make sure it failed. +# Create block device file backed by a ZFS volume. +# Verify it cannot be opened, written, and read. # -create_dev_file b $TESTDIR/$TESTFILE1 -log_mustnot dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out count=1 +create_dev_file b $TESTDIR/$TESTFILE1 $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL +log_mustnot dd if=/dev/urandom of=$TESTDIR/$TESTFILE1 count=1 bs=128k +log_mustnot dd if=$TESTDIR/$TESTFILE1 of=/dev/null count=1 bs=128k + +# Create character device file backed by /dev/null +# Verify it cannot be opened and written. create_dev_file c $TESTDIR/$TESTFILE2 -log_mustnot dd if=$TESTDIR/$TESTFILE2 of=$TESTDIR/$TESTFILE2.out count=1 +log_mustnot dd if=/dev/urandom of=$TESTDIR/$TESTFILE2 count=1 bs=128k log_pass "Setting devices=off on file system and testing it pass." diff --git a/tests/zfs-tests/tests/functional/devices/devices_common.kshlib b/tests/zfs-tests/tests/functional/devices/devices_common.kshlib index 2c7df8d058c..fa7fdbecf5f 100644 --- a/tests/zfs-tests/tests/functional/devices/devices_common.kshlib +++ b/tests/zfs-tests/tests/functional/devices/devices_common.kshlib @@ -36,89 +36,74 @@ # # $1 device file type # $2 file name +# $3 device path (used for 'b' device type) # function create_dev_file { typeset filetype=$1 typeset filename=$2 + typeset devstr=$3 case $filetype in - b) - if is_linux; then - major=$(awk '/[hsv]d/ { print $1; exit }' \ - /proc/partitions) - minor=$(awk '/[hsv]d/ { print $2; exit }' \ - /proc/partitions) - log_must mknod $filename b $major $minor - return 0 - fi - - devtype=$(df -n / | awk '{print $3}') - case $devtype in - zfs) - rootpool=$(df / | \ - awk '{print $2}') - rootpool=${rootpool#\(} - rootpool=${rootpool%%/*} - - devstr=$(get_disklist $rootpool) - devstr=$(echo "$devstr" | \ - awk '{print $1}') - [[ -z $devstr ]] && \ - log_fail "Can not get block device file." - devstr=$DEV_DSKDIR/${devstr} - ;; - ufs) + b) + case $(uname) in + Linux) # - # Get the existing block device file in current system. - # And bring out the first one. + # stat(1) --format=FORMAT tokens + # %t - major device type in hex + # %T - minor device type in hex # - devstr=$(df-lhF ufs | \ - grep "^${DEV_DSKDIR}" | \ - awk '{print $1}') - devstr=$(echo "$devstr" | \ - awk '{print $1}') - [[ -z $devstr ]] && \ - log_fail "Can not get block device file." - ;; - *) - log_unsupported "Unsupported fstype " \ - "for / ($devtype)," \ - "only ufs|zfs is supported." - ;; - esac - + major=$(stat --dereference --format="%t" "$devstr") + minor=$(stat --dereference --format="%T" "$devstr") + log_must mknod $filename b "0x${major}" "0x${minor}" + ;; + *) # # Get the device file information. i.e: - # $DEV_DSKDIR/c0t0d0s0: block special (28/768) + # $devstr: block special (28/768) # devstr=$(file $devstr) - - # - # Bring out major and minor number. - # major=${devstr##*\(} major=${major%%/*} minor=${devstr##*/} minor=${minor%\)} - log_must mknod $filename b $major $minor ;; - c) + esac + ;; + c) + # + # Create device file '/dev/null', $devstr is unused. + # + case $(uname) in + Linux) + # + # stat(1) --format=FORMAT tokens + # %t - major device type in hex + # %T - minor device type in hex + # + major=$(stat --format="%t" /dev/null) + minor=$(stat --format="%T" /dev/null) + log_must mknod $filename c "0x${major}" "0x${minor}" + ;; + FreeBSD) # # Create device file '/dev/null' # - if is_linux; then - major=$(stat -c %t /dev/null) - minor=$(stat -c %T /dev/null) - log_must mknod $filename c $major $minor - else - log_must mknod $filename c $(getmajor mm) 2 - fi + major=13 + minor=2 + log_must mknod $filename b $major $minor ;; *) - log_fail "'$filetype' is wrong." + major=$(getmajor mm) + minor=2 + log_must mknod $filename b $major $minor ;; + esac + ;; + *) + log_fail "'$filetype' is wrong." + ;; esac return 0 @@ -129,6 +114,6 @@ function cleanup log_must zfs set devices=on $TESTPOOL/$TESTFS log_must rm -f $TESTDIR/$TESTFILE1 log_must rm -f $TESTDIR/$TESTFILE2 - log_must rm -f $TESTDIR/$TESTFILE1.out - log_must rm -f $TESTDIR/$TESTFILE2.out + log_must rm -f $TESTDIR/$TESTFILE1.out1 + log_must rm -f $TESTDIR/$TESTFILE1.out2 } diff --git a/tests/zfs-tests/tests/functional/devices/setup.ksh b/tests/zfs-tests/tests/functional/devices/setup.ksh index fc5cec3063a..ee6cf83acb9 100755 --- a/tests/zfs-tests/tests/functional/devices/setup.ksh +++ b/tests/zfs-tests/tests/functional/devices/setup.ksh @@ -32,4 +32,4 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} -default_setup $DISK +default_volume_setup $DISK From 9aec34703ee4f51bc23ef5b3ad7908e111386793 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 Dec 2019 12:12:41 -0800 Subject: [PATCH 311/325] ZTS: zfs_program_json As of Python 3.5 the default behavior of json.tool was changed to preserve the input order rather than lexical order. The test case expects the output to be sorted so apply the --sort-keys option to the json.tool command when using Python 3.5 and the option is supported. https://docs.python.org/3/library/json.html#module-json.tool Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9774 --- .../cli_root/zfs_program/zfs_program_json.ksh | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh index 3d59f784a48..3788543b0b2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh @@ -91,14 +91,28 @@ typeset -a pos_cmds_out=( } } }") + +# +# N.B. json.tool is needed to guarantee consistent ordering of fields, +# sed is needed to trim trailing space in CentOS 6's json.tool output +# +# As of Python 3.5 the behavior of json.tool changed to keep the order +# the same as the input and the --sort-keys option was added. Detect when +# --sort-keys is supported and apply the option to ensure the expected order. +# +if python -m json.tool --sort-keys <<< "{}"; then + JSON_TOOL_CMD="python -m json.tool --sort-keys" +else + JSON_TOOL_CMD="python -m json.tool" +fi + typeset -i cnt=0 typeset cmd for cmd in ${pos_cmds[@]}; do log_must zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 log_must zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 - # json.tool is needed to guarantee consistent ordering of fields - # sed is needed to trim trailing space in CentOS 6's json.tool output - OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 | python -m json.tool | sed 's/[[:space:]]*$//') + OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 | + $JSON_TOOL_CMD | sed 's/[[:space:]]*$//') if [ "$OUTPUT" != "${pos_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${pos_cmds_out[$cnt]}" From 70d2e938b5443821fc48d99c08322a15c3a3c17e Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 Dec 2019 16:41:16 -0800 Subject: [PATCH 312/325] ZTS: Replace /var/tmp with $TEST_BASE_DIR Remove a few hardcoded instances of /var/tmp. This should use the $TEST_BASE_DIR in order to allow the ZTS to be optionally run in an alternate directory using `zfs-tests.sh -d `. Reviewed-by: Ryan Moeller Reviewed-by: John Kennedy Reviewed-by: Igor Kozhukhov Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9775 --- .../tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh | 2 +- .../tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh index b0a319d4193..2c6e3fdd6d2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh @@ -75,7 +75,7 @@ log_onexit cleanup init_snap=$TESTPOOL/$TESTFS@init_snap inc_snap=$TESTPOOL/$TESTFS@inc_snap full_bkup=$TEST_BASE_DIR/fullbkup.$$ -inc_bkup=/var/tmp/incbkup.$$ +inc_bkup=$TEST_BASE_DIR/incbkup.$$ init_data=$TESTDIR/$TESTFILE1 inc_data=$TESTDIR/$TESTFILE2 orig_sum="" diff --git a/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh index d881b831ffb..8e9009bd550 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh @@ -57,9 +57,8 @@ function cleanup log_onexit cleanup log_assert "'zfs list -d ' should get expected output." -mntpnt=/var/tmp -DEPTH_OUTPUT="$mntpnt/depth_output" -EXPECT_OUTPUT="$mntpnt/expect_output" +DEPTH_OUTPUT="$TEST_BASE_DIR/depth_output" +EXPECT_OUTPUT="$TEST_BASE_DIR/expect_output" typeset -i old_val=0 typeset -i j=0 typeset -i fs=0 From 756c58cf71e4e87d68265d5d5445ec3ff88763aa Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sat, 28 Dec 2019 08:43:23 -0800 Subject: [PATCH 313/325] ZTS: Fix pool_state cleanup The externally faulted vdev should be brought back online and have its errors cleared before the pool is destroyed. Failure to do so will leave a vdev with a valid active label. This vdev may then not be used to create a new pool without the -f flag potentially leading to subsequent test failures. Additionally remove an unreachable log_pass from setup.ksh. Reviewed-by: John Kennedy Reviewed-by: Kjeld Schouten Signed-off-by: Brian Behlendorf Closes #9777 --- tests/zfs-tests/tests/functional/procfs/pool_state.ksh | 4 +++- tests/zfs-tests/tests/functional/procfs/setup.ksh | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh index 6543f87c46d..f4df839be63 100755 --- a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh +++ b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh @@ -105,8 +105,10 @@ check_all $TESTPOOL "ONLINE" # Fault one of the disks, and check that pool is degraded DISK1=$(echo "$DISKS" | awk '{print $2}') -zpool offline -tf $TESTPOOL $DISK1 +log_must zpool offline -tf $TESTPOOL $DISK1 check_all $TESTPOOL "DEGRADED" +log_must zpool online $TESTPOOL $DISK1 +log_must zpool clear $TESTPOOL # Create a new pool out of a scsi_debug disk TESTPOOL2=testpool2 diff --git a/tests/zfs-tests/tests/functional/procfs/setup.ksh b/tests/zfs-tests/tests/functional/procfs/setup.ksh index b3812dbdc64..3444cfcf2f4 100755 --- a/tests/zfs-tests/tests/functional/procfs/setup.ksh +++ b/tests/zfs-tests/tests/functional/procfs/setup.ksh @@ -31,4 +31,3 @@ if ! is_linux ; then fi default_mirror_setup $DISKS -log_pass From c7dc6f3ab391294e1615dd2b830263eabdbf405d Mon Sep 17 00:00:00 2001 From: Ben Cordero Date: Sun, 29 Dec 2019 19:25:01 +0000 Subject: [PATCH 314/325] zfs-load-key.sh: ${ZFS} is not the zfs binary A change[1] was merged yesterday that should refer to the zfs binary in the initramfs, but is actually an unset shell variable. This commit changes this line to call `zfs` directly like the surrounding code. [1]: cb5b875b273235a4a3ed28e16f416d5bb8865166 Reviewed-by: Brian Behlendorf Reviewed-by: Garrett Fields Reviewed-by: Richard Laager Signed-off-by: Ben Cordero Closes #9780 --- contrib/dracut/90zfs/zfs-load-key.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 4e945f14abb..85e55c51bfa 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -38,7 +38,7 @@ if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{prin # if the root dataset has encryption enabled ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}") # where the key is stored (in a file or loaded via prompt) - KEYLOCATION=$(${ZFS} get -H -o value keylocation "${ENCRYPTIONROOT}") + KEYLOCATION=$(zfs get -H -o value keylocation "${ENCRYPTIONROOT}") if ! [ "${ENCRYPTIONROOT}" = "-" ]; then KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" # continue only if the key needs to be loaded From 0a37abc206560d91290a34ec21abbbbc7e007bf0 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Mon, 30 Dec 2019 09:14:40 -0800 Subject: [PATCH 315/325] zdb: print block checksums with 6 d's of verbosity Include checksums in the output of 'zdb -dddddd' along with other indirect block information already displayed. Example output follows (with long lines trimmed): $ zdb -dddddd tank/fish 128 Dataset tank/fish [ZPL], ID 259, cr_txg 10, 16.2M, 93 objects, rootbp DV Object lvl iblk dblk dsize dnsize lsize %full type 128 2 128K 128K 634K 512 1M 100.00 ZFS plain f 168 bonus System attri dnode flags: USED_BYTES USERUSED_ACCOUNTED USEROBJUSED_ACCOUNTED dnode maxblkid: 7 path /c uid 0 gid 0 atime Sat Dec 21 10:49:26 2019 mtime Sat Dec 21 10:49:26 2019 ctime Sat Dec 21 10:49:26 2019 crtime Sat Dec 21 10:49:26 2019 gen 41 mode 100755 size 964592 parent 34 links 1 pflags 40800000104 Indirect blocks: 0 L1 0:2c0000:400 0:c021e00:400 20000L/400P F=8 B=41/41 0 L0 0:227800:13800 20000L/13800P F=1 B=41/41 cksum=167a 20000 L0 0:25ec00:17c00 20000L/17c00P F=1 B=41/41 cksum=2312 40000 L0 0:276800:18400 20000L/18400P F=1 B=41/41 cksum=24e0 60000 L0 0:2a7800:18800 20000L/18800P F=1 B=41/41 cksum=25be 80000 L0 0:28ec00:18c00 20000L/18c00P F=1 B=41/41 cksum=2579 a0000 L0 0:24d000:11c00 20000L/11c00P F=1 B=41/41 cksum=140a c0000 L0 0:23b000:12000 20000L/12000P F=1 B=41/41 cksum=164e e0000 L0 0:221e00:5a00 20000L/5a00P F=1 B=41/41 cksum=9de790 segment [0000000000000000, 0000000000100000) size 1M Reviewed-by: Kjeld Schouten Reviewed-by: Brian Behlendorf Signed-off-by: Ned Bass --- cmd/zdb/zdb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 92bfd3ecfe3..0182c79720b 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1450,6 +1450,12 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], + (u_longlong_t)bp->blk_cksum.zc_word[2], + (u_longlong_t)bp->blk_cksum.zc_word[3]); } } From 82be30978059f59a2e56f6c0f2caf67c47ac741b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 6 Jan 2020 11:14:19 -0800 Subject: [PATCH 316/325] ZTS: Cleanup partition tables The cleanup_devices function should remove any partitions created on the device and force the partition table to be reread. This is needed to ensure that blkid has an up to date version of what devices and partitions are used by zfs. The cleanup_devices call was removed from inuse_008_pos.ksh since it operated on partitions instead of devices and was not needed. Lastly ddidecode may be called by parted and was therefore added to the constrained path. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9806 --- tests/zfs-tests/include/commands.cfg | 1 + tests/zfs-tests/include/libtest.shlib | 7 ++++--- tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 0d75de9a233..4d98e7c11a6 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -33,6 +33,7 @@ export SYSTEM_FILES='arp diff dirname dmesg + dmidecode du echo egrep diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index b439b44f586..cd593b6f258 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -2202,10 +2202,11 @@ function cleanup_devices #vdevs { typeset pool="foopool$$" - if poolexists $pool ; then - destroy_pool $pool - fi + for vdev in $@; do + zero_partitions $vdev + done + poolexists $pool && destroy_pool $pool create_pool $pool $@ destroy_pool $pool diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh index 1f5510ae5e6..a08beb8b251 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh @@ -107,7 +107,6 @@ while (( i < ${#vdevs[*]} )); do create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices log_must zpool export $TESTPOOL1 verify_assertion "$rawtargets" - cleanup_devices $vslices $sslices (( i = i + 1 )) done From 543b0c644a36a5804e65e6009db48d10e3dadfc3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Tue, 7 Jan 2020 18:02:49 -0500 Subject: [PATCH 317/325] Fix zfs-0.8.3 'make lint' warnings Fix these lint warnings on zfs-0.8.3: $ make lint [module/spl/spl-vnode.c:494]: (error) Uninitialized variable: fp [module/spl/spl-vnode.c:706]: (error) Uninitialized variable: fp [module/spl/spl-vnode.c:706]: (error) Uninitialized variable: next_fp ^CMakefile:1632: recipe for target 'cppcheck' failed make: *** [cppcheck] Interrupt Signed-off-by: Tony Hutter --- module/spl/spl-vnode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index d9056c964e5..032bd1aba9c 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -489,7 +489,7 @@ EXPORT_SYMBOL(vn_space); static file_t * file_find(int fd, struct task_struct *task) { - file_t *fp; + file_t *fp = NULL; list_for_each_entry(fp, &vn_file_list, f_list) { if (fd == fp->f_fd && fp->f_task == task) { @@ -698,7 +698,7 @@ spl_vn_init(void) void spl_vn_fini(void) { - file_t *fp, *next_fp; + file_t *fp = NULL, *next_fp = NULL; int leaked = 0; spin_lock(&vn_file_lock); From 7eaaa6f32ecdd63b5633ba54f1189cf5cf79aec3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Thu, 2 Jan 2020 20:45:53 -0800 Subject: [PATCH 318/325] Fix zfs-0.8.3 zfs_receive_raw test case Fix the zfs_receive_raw test case for zfs-0.8.3 by including the one-liner fix from loli10k described here: https://github.com/zfsonlinux/zfs/pull/9776#issuecomment-570252679 Signed-off-by: Tony Hutter --- lib/libzfs/libzfs_sendrecv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index a9323579877..1875f79e7c3 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -3518,6 +3518,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, 8), B_FALSE, NULL); break; + case DRR_OBJECT_RANGE: case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: From 0fd9a28de87ad2a2350d3160185446b08fbfa8f1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 6 Jan 2020 11:17:53 -0800 Subject: [PATCH 319/325] Fix QAT allocation failure return value When qat_compress() fails to allocate the required contiguous memory it mistakenly returns success. This prevents the fallback software compression from taking over and (un)compressing the block. Resolve the issue by correctly setting the local 'status' variable on all exit paths. Furthermore, initialize it to CPA_STATUS_FAIL to ensure qat_compress() always fails safe to guard against any similar bugs in the future. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #9784 Closes #9788 --- module/zfs/qat_compress.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 46ccb997a3b..011358329f4 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -249,7 +249,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, Cpa8U *buffer_meta_dst = NULL; Cpa32U buffer_meta_size = 0; CpaDcRqResults dc_results; - CpaStatus status = CPA_STATUS_SUCCESS; + CpaStatus status = CPA_STATUS_FAIL; Cpa32U hdr_sz = 0; Cpa32U compressed_sz; Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2; @@ -278,16 +278,19 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) + ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer)); - if (QAT_PHYS_CONTIG_ALLOC(&in_pages, - num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&in_pages, + num_src_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; - if (QAT_PHYS_CONTIG_ALLOC(&out_pages, - num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&out_pages, + num_dst_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; - if (QAT_PHYS_CONTIG_ALLOC(&add_pages, - num_add_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&add_pages, + num_add_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst; @@ -296,19 +299,19 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf, &buffer_meta_size); - if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size); + if (status != CPA_STATUS_SUCCESS) goto fail; cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf, &buffer_meta_size); - if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size); + if (status != CPA_STATUS_SUCCESS) goto fail; /* build source buffer list */ - if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size); + if (status != CPA_STATUS_SUCCESS) goto fail; flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1); @@ -316,8 +319,8 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, buf_list_src->pBuffers = flat_buf_src; /* always point to first one */ /* build destination buffer list */ - if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size); + if (status != CPA_STATUS_SUCCESS) goto fail; flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1); From 1be3cba3819f7f9b59b11601f6761028bf92c7ca Mon Sep 17 00:00:00 2001 From: jwpoduska Date: Wed, 27 Nov 2019 13:15:01 -0500 Subject: [PATCH 320/325] Prevent unnecessary resilver restarts If a device is participating in an active resilver, then it will have a non-empty DTL. Operations like vdev_{open,reopen,probe}() can cause the resilver to be restarted (or deferred to be restarted later), which is unnecessary if the DTL is still covered by the current scan range. This is similar to the logic in vdev_dtl_should_excise() where the DTL can only be excised if it's max txg is in the resilvered range. Reviewed-by: Brian Behlendorf Reviewed-by: John Gallagher Reviewed-by: Kjeld Schouten Signed-off-by: John Poduska Issue #840 Closes #9155 Closes #9378 Closes #9551 Closes #9588 --- configure.ac | 1 + include/sys/dsl_scan.h | 6 +- include/sys/spa.h | 3 +- include/sys/vdev.h | 4 +- module/zfs/dsl_scan.c | 100 +++++----- module/zfs/spa.c | 14 +- module/zfs/vdev.c | 76 ++++--- tests/runfiles/linux.run | 4 + tests/zfs-tests/tests/functional/Makefile.am | 1 + .../tests/functional/resilver/Makefile.am | 8 + .../tests/functional/resilver/cleanup.ksh | 31 +++ .../tests/functional/resilver/resilver.cfg | 32 +++ .../resilver/resilver_restart_001.ksh | 185 ++++++++++++++++++ .../tests/functional/resilver/setup.ksh | 31 +++ 14 files changed, 409 insertions(+), 87 deletions(-) create mode 100644 tests/zfs-tests/tests/functional/resilver/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/resilver/cleanup.ksh create mode 100644 tests/zfs-tests/tests/functional/resilver/resilver.cfg create mode 100755 tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh create mode 100755 tests/zfs-tests/tests/functional/resilver/setup.ksh diff --git a/configure.ac b/configure.ac index 46a27f7f194..6fcc89044dd 100644 --- a/configure.ac +++ b/configure.ac @@ -323,6 +323,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/rename_dirs/Makefile tests/zfs-tests/tests/functional/replacement/Makefile tests/zfs-tests/tests/functional/reservation/Makefile + tests/zfs-tests/tests/functional/resilver/Makefile tests/zfs-tests/tests/functional/rootpool/Makefile tests/zfs-tests/tests/functional/rsend/Makefile tests/zfs-tests/tests/functional/scrub_mirror/Makefile diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index 345d2754fb6..032f7f3e2d1 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_DSL_SCAN_H @@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); int dsl_scan(struct dsl_pool *, pool_scan_func_t); +void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); -void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); +void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); +boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_entry_t *dde, dmu_tx_t *tx); diff --git a/include/sys/spa.h b/include/sys/spa.h index 42bf9dcc104..4c1dcdcc133 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -26,7 +26,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -777,6 +777,7 @@ extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); extern void spa_async_suspend(spa_t *spa); extern void spa_async_resume(spa_t *spa); +extern int spa_async_tasks(spa_t *spa); extern spa_t *spa_inject_addref(char *pool); extern void spa_inject_delref(spa_t *spa); extern void spa_scan_stat_init(spa_t *spa); diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 67ca0d11614..339a48861c1 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_VDEV_H @@ -151,7 +152,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); -extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd); +extern void vdev_defer_resilver(vdev_t *vd); +extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx); typedef enum vdev_config_flag { VDEV_CONFIG_SPARE = 1 << 0, diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index f1d995b3d3a..d7111368123 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -22,7 +22,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Gary Mills - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright 2019 Joyent, Inc. */ @@ -591,6 +591,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx) scn->scn_restart_txg <= tx->tx_txg); } +boolean_t +dsl_scan_resilver_scheduled(dsl_pool_t *dp) +{ + return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) || + (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER)); +} + boolean_t dsl_scan_scrubbing(const dsl_pool_t *dp) { @@ -786,7 +793,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) (void) spa_vdev_state_exit(spa, NULL, 0); if (func == POOL_SCAN_RESILVER) { - dsl_resilver_restart(spa->spa_dsl_pool, 0); + dsl_scan_restart_resilver(spa->spa_dsl_pool, 0); return (0); } @@ -806,41 +813,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } -/* - * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns - * B_TRUE if we have devices that need to be resilvered and are available to - * accept resilver I/Os. - */ -static boolean_t -dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx) -{ - boolean_t resilver_needed = B_FALSE; - spa_t *spa = vd->vdev_spa; - - for (int c = 0; c < vd->vdev_children; c++) { - resilver_needed |= - dsl_scan_clear_deferred(vd->vdev_child[c], tx); - } - - if (vd == spa->spa_root_vdev && - spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { - spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); - vdev_config_dirty(vd); - spa->spa_resilver_deferred = B_FALSE; - return (resilver_needed); - } - - if (!vdev_is_concrete(vd) || vd->vdev_aux || - !vd->vdev_ops->vdev_op_leaf) - return (resilver_needed); - - if (vd->vdev_resilver_deferred) - vd->vdev_resilver_deferred = B_FALSE; - - return (!vdev_is_dead(vd) && !vd->vdev_offline && - vdev_resilver_needed(vd, NULL, NULL)); -} - /* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) @@ -943,25 +915,21 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); /* - * Clear any deferred_resilver flags in the config. + * Clear any resilver_deferred flags in the config. * If there are drives that need resilvering, kick * off an asynchronous request to start resilver. - * dsl_scan_clear_deferred() may update the config + * vdev_clear_resilver_deferred() may update the config * before the resilver can restart. In the event of * a crash during this period, the spa loading code * will find the drives that need to be resilvered - * when the machine reboots and start the resilver then. + * and start the resilver then. */ - if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) { - boolean_t resilver_needed = - dsl_scan_clear_deferred(spa->spa_root_vdev, tx); - if (resilver_needed) { - spa_history_log_internal(spa, - "starting deferred resilver", tx, - "errors=%llu", - (u_longlong_t)spa_get_errlog_size(spa)); - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) && + vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) { + spa_history_log_internal(spa, + "starting deferred resilver", tx, "errors=%llu", + (u_longlong_t)spa_get_errlog_size(spa)); + spa_async_request(spa, SPA_ASYNC_RESILVER); } } @@ -1071,7 +1039,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) /* start a new scan, or restart an existing one. */ void -dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg) +dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg) { if (txg == 0) { dmu_tx_t *tx; @@ -4232,6 +4200,36 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp) dsl_scan_freed_dva(spa, bp, i); } +/* + * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has + * not started, start it. Otherwise, only restart if max txg in DTL range is + * greater than the max txg in the current scan. If the DTL max is less than + * the scan max, then the vdev has not missed any new data since the resilver + * started, so a restart is not needed. + */ +void +dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd) +{ + uint64_t min, max; + + if (!vdev_resilver_needed(vd, &min, &max)) + return; + + if (!dsl_scan_resilvering(dp)) { + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); + return; + } + + if (max <= dp->dp_scan->scn_phys.scn_max_txg) + return; + + /* restart is needed, check if it can be deferred */ + if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_defer_resilver(vd); + else + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); +} + #if defined(_KERNEL) /* CSTYLED */ module_param(zfs_scan_vdev_limit, ulong, 0644); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7fa18cbd1de..cf8462cec33 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -29,7 +29,7 @@ * Copyright 2016 Toomas Soome * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright 2018 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. */ @@ -6242,9 +6242,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ if (dsl_scan_resilvering(spa_get_dsl(spa)) && spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, newvd); + vdev_defer_resilver(newvd); else - dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); + dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg); if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); @@ -7479,7 +7479,7 @@ spa_async_thread(void *arg) if (tasks & SPA_ASYNC_RESILVER && (!dsl_scan_resilvering(dp) || !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))) - dsl_resilver_restart(dp, 0); + dsl_scan_restart_resilver(dp, 0); if (tasks & SPA_ASYNC_INITIALIZE_RESTART) { mutex_enter(&spa_namespace_lock); @@ -7595,6 +7595,12 @@ spa_async_request(spa_t *spa, int task) mutex_exit(&spa->spa_async_lock); } +int +spa_async_tasks(spa_t *spa) +{ + return (spa->spa_async_tasks); +} + /* * ========================================================================== * SPA syncing routines diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 952b565819e..a68c0dfa737 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -27,6 +27,7 @@ * Copyright 2016 Toomas Soome * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #include @@ -833,7 +834,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_resilver_txg); if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); + vdev_defer_resilver(vd); /* * In general, when importing a pool we want to ignore the @@ -1863,18 +1864,12 @@ vdev_open(vdev_t *vd) } /* - * If a leaf vdev has a DTL, and seems healthy, then kick off a - * resilver. But don't do this if we are doing a reopen for a scrub, - * since this would just restart the scrub we are already doing. + * If this is a leaf vdev, assess whether a resilver is needed. + * But don't do this if we are doing a reopen for a scrub, since + * this would just restart the scrub we are already doing. */ - if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && - vdev_resilver_needed(vd, NULL, NULL)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen) + dsl_scan_assess_vdev(spa->spa_dsl_pool, vd); return (0); } @@ -3693,14 +3688,11 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd != rvd && vdev_writeable(vd->vdev_top)) vdev_state_dirty(vd->vdev_top); - if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, - SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + /* If a resilver isn't required, check if vdevs can be culled */ + if (vd->vdev_aux == NULL && !vdev_is_dead(vd) && + !dsl_scan_resilvering(spa->spa_dsl_pool) && + !dsl_scan_resilver_scheduled(spa->spa_dsl_pool)) + spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR); } @@ -4693,18 +4685,46 @@ vdev_deadman(vdev_t *vd, char *tag) } void -vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd) +vdev_defer_resilver(vdev_t *vd) { - for (uint64_t i = 0; i < vd->vdev_children; i++) - vdev_set_deferred_resilver(spa, vd->vdev_child[i]); + ASSERT(vd->vdev_ops->vdev_op_leaf); - if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) || - range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { - return; + vd->vdev_resilver_deferred = B_TRUE; + vd->vdev_spa->spa_resilver_deferred = B_TRUE; +} + +/* + * Clears the resilver deferred flag on all leaf devs under vd. Returns + * B_TRUE if we have devices that need to be resilvered and are available to + * accept resilver I/Os. + */ +boolean_t +vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx) +{ + boolean_t resilver_needed = B_FALSE; + spa_t *spa = vd->vdev_spa; + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + resilver_needed |= vdev_clear_resilver_deferred(cvd, tx); } - vd->vdev_resilver_deferred = B_TRUE; - spa->spa_resilver_deferred = B_TRUE; + if (vd == spa->spa_root_vdev && + spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { + spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + vdev_config_dirty(vd); + spa->spa_resilver_deferred = B_FALSE; + return (resilver_needed); + } + + if (!vdev_is_concrete(vd) || vd->vdev_aux || + !vd->vdev_ops->vdev_op_leaf) + return (resilver_needed); + + vd->vdev_resilver_deferred = B_FALSE; + + return (!vdev_is_dead(vd) && !vd->vdev_offline && + vdev_resilver_needed(vd, NULL, NULL)); } /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 1361a8b53ea..ae15cd22178 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -796,6 +796,10 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', 'reservation_022_pos'] tags = ['functional', 'reservation'] +[tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [tests/functional/rootpool] tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos'] tags = ['functional', 'rootpool'] diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index ac0ba7cf3d1..a1fe06c165f 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -59,6 +59,7 @@ SUBDIRS = \ rename_dirs \ replacement \ reservation \ + resilver \ rootpool \ rsend \ scrub_mirror \ diff --git a/tests/zfs-tests/tests/functional/resilver/Makefile.am b/tests/zfs-tests/tests/functional/resilver/Makefile.am new file mode 100644 index 00000000000..465d8f3a3a3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + resilver_restart_001.ksh + +dist_pkgdata_DATA = \ + resilver.cfg diff --git a/tests/zfs-tests/tests/functional/resilver/cleanup.ksh b/tests/zfs-tests/tests/functional/resilver/cleanup.ksh new file mode 100755 index 00000000000..4dfa8142451 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass diff --git a/tests/zfs-tests/tests/functional/resilver/resilver.cfg b/tests/zfs-tests/tests/functional/resilver/resilver.cfg new file mode 100644 index 00000000000..88dfd24aed2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/resilver.cfg @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4} +SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1 + +VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 )) diff --git a/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh new file mode 100755 index 00000000000..ad0e1961e4b --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh @@ -0,0 +1,185 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +# +# DESCRIPTION: +# Testing resilver restart logic both with and without the deferred resilver +# feature enabled, verifying that resilver is not restarted when it is +# unecessary. +# +# STRATEGY: +# 1. Create a pool +# 2. Create four filesystems with the primary cache disable to force reads +# 3. Write four files simultaneously, one to each filesystem +# 4. Do with and without deferred resilvers enabled +# a. Replace a vdev with a spare & suspend resilver immediately +# b. Verify resilver starts properly +# c. Offline / online another vdev to introduce a new DTL range +# d. Verify resilver restart restart or defer +# e. Inject read errors on vdev that was offlined / onlned +# f. Verify that resilver did not restart +# g. Unsuspend resilver and wait for it to finish +# h. Verify that there are two resilvers and nothing is deferred +# + +function cleanup +{ + echo $ORIG_RESILVER_MIN_TIME > $ZFS_PARAMS/zfs_resilver_min_time_ms + echo $ORIG_SCAN_SUSPEND_PROGRESS > $ZFS_PARAMS/zfs_scan_suspend_progress + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE +} + +# Count resilver events in zpool and number of deferred rsilvers on vdevs +function verify_restarts # +{ + msg=$1 + cnt=$2 + defer=$3 + + # check the number of resilver start in events log + RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start) + log_note "expected $cnt resilver start(s)$msg, found $RESILVERS" + [[ "$RESILVERS" -ne "$cnt" ]] && + log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS" + + [[ -z "$defer" ]] && return + + # use zdb to find which vdevs have the resilver defer flag + VDEV_DEFERS=$(zdb -C $TESTPOOL | \ + sed -n -e '/^ *children\[[0-9]\].*$/{h}' \ + -e '/ *com.datto:resilver_defer$/{g;p}') + + if [[ "$defer" == "-" ]] + then + [[ -n $VDEV_DEFERS ]] && + log_fail "didn't expect any vdevs to have resilver deferred" + return + fi + + [[ "x${VDEV_DEFERS}x" =~ "x +children[$defer]:x" ]] || + log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS" +} + +log_assert "Check for unnecessary resilver restarts" + +ZFS_PARAMS=/sys/module/zfs/parameters +ORIG_RESILVER_MIN_TIME=$(cat $ZFS_PARAMS/zfs_resilver_min_time_ms) +ORIG_SCAN_SUSPEND_PROGRESS=$(cat $ZFS_PARAMS/zfs_scan_suspend_progress) + +set -A RESTARTS -- '1' '2' '2' '2' +set -A VDEVS -- '' '' '' '' +set -A DEFER_RESTARTS -- '1' '1' '1' '2' +set -A DEFER_VDEVS -- '-' '2' '2' '-' + +VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + +log_onexit cleanup + +log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE + +log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ + raidz ${VDEV_FILES[@]} + +# Create 4 filesystems +for fs in fs{0..3} +do + log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs +done + +# simultaneously write 16M to each of them +set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0 +log_note "Writing data files" +for path in ${DATAPATHS[@]} +do + dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 & +done +wait + +# Test without and with deferred resilve feature enabled +for test in "without" "with" +do + log_note "Testing $test deferred resilvers" + + if [[ $test == "with" ]] + then + log_must zpool set feature@resilver_defer=enabled $TESTPOOL + RESTARTS=( "${DEFER_RESTARTS[@]}" ) + VDEVS=( "${DEFER_VDEVS[@]}" ) + VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + fi + + # clear the events + log_must zpool events -c + + # limit scanning time + echo 50 > $ZFS_PARAMS/zfs_resilver_min_time_ms + + # initiate a resilver and suspend the scan as soon as possible + log_must zpool replace $TESTPOOL $VDEV_REPLACE + echo 1 > $ZFS_PARAMS/zfs_scan_suspend_progress + + # there should only be 1 resilver start + verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" + + # offline then online a vdev to introduce a new DTL range after current + # scan, which should restart (or defer) the resilver + log_must zpool offline $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + log_must zpool online $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" + + # inject read io errors on vdev and verify resilver does not restart + log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL + log_must cat ${DATAPATHS[1]} > /dev/null + log_must zinject -c all + + # there should still be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" + + # unsuspend resilver + echo 0 > $ZFS_PARAMS/zfs_scan_suspend_progress + echo 3000 > $ZFS_PARAMS/zfs_resilver_min_time_ms + + # wait for resilver to finish + for iter in {0..59} + do + is_pool_resilvered $TESTPOOL && break + sleep 1 + done + is_pool_resilvered $TESTPOOL || + log_fail "resilver timed out" + + # wait for a few txg's to see if a resilver happens + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts + verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" +done + +log_pass "Resilver did not restart unnecessarily" diff --git a/tests/zfs-tests/tests/functional/resilver/setup.ksh b/tests/zfs-tests/tests/functional/resilver/setup.ksh new file mode 100755 index 00000000000..4dfa8142451 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/setup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass From 504aae708e1dc5d949e2a0434deeec374a6abfe5 Mon Sep 17 00:00:00 2001 From: John Poduska Date: Tue, 10 Dec 2019 12:10:36 -0500 Subject: [PATCH 321/325] ZTS: Fixes for spurious failures of resilver_restart_001 test The resilver restart test was reported as failing about 2% of the time. Two issues were found: - The event log wasn't large enough, so resilver events were missing - One 'zpool sync' wasn't enough for resilver to start after zinject Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Reviewed-by: Kjeld Schouten Signed-off-by: John Poduska Issue #9588 Closes #9677 Closes #9703 --- .../resilver/resilver_restart_001.ksh | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh index ad0e1961e4b..876b28690c1 100755 --- a/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh +++ b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh @@ -45,14 +45,16 @@ function cleanup { - echo $ORIG_RESILVER_MIN_TIME > $ZFS_PARAMS/zfs_resilver_min_time_ms - echo $ORIG_SCAN_SUSPEND_PROGRESS > $ZFS_PARAMS/zfs_scan_suspend_progress + log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME + log_must set_tunable32 zfs_scan_suspend_progress \ + $ORIG_SCAN_SUSPEND_PROGRESS + log_must set_tunable32 zfs_zevent_len_max $ORIG_ZFS_ZEVENT_LEN_MAX log_must zinject -c all destroy_pool $TESTPOOL rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE } -# Count resilver events in zpool and number of deferred rsilvers on vdevs +# count resilver events in zpool and number of deferred rsilvers on vdevs function verify_restarts # { msg=$1 @@ -85,9 +87,9 @@ function verify_restarts # log_assert "Check for unnecessary resilver restarts" -ZFS_PARAMS=/sys/module/zfs/parameters -ORIG_RESILVER_MIN_TIME=$(cat $ZFS_PARAMS/zfs_resilver_min_time_ms) -ORIG_SCAN_SUSPEND_PROGRESS=$(cat $ZFS_PARAMS/zfs_scan_suspend_progress) +ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms) +ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress) +ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable zfs_zevent_len_max) set -A RESTARTS -- '1' '2' '2' '2' set -A VDEVS -- '' '' '' '' @@ -98,12 +100,15 @@ VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" log_onexit cleanup +# ensure that enough events will be saved +log_must set_tunable32 zfs_zevent_len_max 512 + log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ raidz ${VDEV_FILES[@]} -# Create 4 filesystems +# create 4 filesystems for fs in fs{0..3} do log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs @@ -118,7 +123,7 @@ do done wait -# Test without and with deferred resilve feature enabled +# test without and with deferred resilve feature enabled for test in "without" "with" do log_note "Testing $test deferred resilvers" @@ -135,11 +140,11 @@ do log_must zpool events -c # limit scanning time - echo 50 > $ZFS_PARAMS/zfs_resilver_min_time_ms + log_must set_tunable32 zfs_resilver_min_time_ms 50 # initiate a resilver and suspend the scan as soon as possible log_must zpool replace $TESTPOOL $VDEV_REPLACE - echo 1 > $ZFS_PARAMS/zfs_scan_suspend_progress + log_must set_tunable32 zfs_scan_suspend_progress 1 # there should only be 1 resilver start verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" @@ -163,8 +168,8 @@ do verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" # unsuspend resilver - echo 0 > $ZFS_PARAMS/zfs_scan_suspend_progress - echo 3000 > $ZFS_PARAMS/zfs_resilver_min_time_ms + log_must set_tunable32 zfs_scan_suspend_progress 0 + log_must set_tunable32 zfs_resilver_min_time_ms 3000 # wait for resilver to finish for iter in {0..59} @@ -177,6 +182,7 @@ do # wait for a few txg's to see if a resilver happens log_must zpool sync $TESTPOOL + log_must zpool sync $TESTPOOL # there should now be 2 resilver starts verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" From 9e36832d31b104d229eb8f979d57e6cf43fc0895 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 22 Jan 2020 10:00:42 -0800 Subject: [PATCH 322/325] Fix zfs-0.8.3 "qat.h" This applies the patch from: https://github.com/zfsonlinux/zfs/issues/9476#issuecomment-543854498 ...which was originally from: 9fa8b5b QAT related bug fixes This allows QAT to build. Signed-off-by: Tony Hutter --- module/zfs/qat.c | 2 +- module/zfs/qat_compress.c | 2 +- module/zfs/qat_crypt.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/module/zfs/qat.c b/module/zfs/qat.c index 08613b3a204..a6f024cb44d 100644 --- a/module/zfs/qat.c +++ b/module/zfs/qat.c @@ -21,7 +21,7 @@ #if defined(_KERNEL) && defined(HAVE_QAT) #include -#include +#include "qat.h" qat_stats_t qat_stats = { { "comp_requests", KSTAT_DATA_UINT64 }, diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 011358329f4..16649d60f66 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include "qat.h" /* * Max instances in a QAT device, each instance is a channel to submit diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c index 1e77f143e3e..ec9f085cffa 100644 --- a/module/zfs/qat_crypt.c +++ b/module/zfs/qat_crypt.c @@ -36,7 +36,7 @@ #include #include "lac/cpa_cy_im.h" #include "lac/cpa_cy_common.h" -#include +#include "qat.h" /* * Max instances in a QAT device, each instance is a channel to submit From 9bb3d57b03e6916a2d38574420a2934b8827b3fb Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 27 Dec 2019 10:32:18 -0800 Subject: [PATCH 323/325] Tag zfs-0.8.3 META file and changelog updated. Signed-off-by: Tony Hutter --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index 8d2d6e8abb5..4871ede9cf7 100644 --- a/META +++ b/META @@ -1,7 +1,7 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.2 +Version: 0.8.3 Release: 1 Release-Tags: relext License: CDDL From 64f2567becb48bf7e430461de951180950b4f407 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 3 Apr 2018 14:04:21 -0700 Subject: [PATCH 324/325] [WIP] raidz expansion, alpha preview 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a alpha-quality preview of RAID-Z expansion. This feature allows disks to be added one at a time to a RAID-Z group, expanding its capacity incrementally. This feature is especially useful for small pools (typically with only one RAID-Z group), where there isn't sufficient hardware to add capacity by adding a whole new RAID-Z group (typically doubling the number of disks). For additional context as well as a design overview, see my short talk from the 2017 OpenZFS Developer Summit: [slides](http://www.open-zfs.org/w/images/6/68/RAIDZ_Expansion_v2.pdf) [video](https://www.youtube.com/watch?v=ZF8V7Tc9G28) Functionality that's currently implemented: * Can expand raidz device with `zpool attach poolname raidz2-0 newdisk` * Simple test script in `scripts/raidz_expand_test.sh` * During reflow/expansion: * All allocated space in device is rewritten (copied to its new location in the RAIDZ vdev) * Reflow happens in background over multiple txg’s * Reads and writes during reflow are handled * Can reboot or export/import, resumes after import (with exception if at the very beginning of reflow) * Progress is reported in zpool status * After expansion completes: * Can initiate additional expansions * Additional space available * Device failure and silent damage are handled * Can reboot or export/import * Status (completion time) reported in zpool status Functionality that still needs to be implemented: * Add on-disk feature flag * Progress should be reported in terms of offset on disk, not bytes copied * Logical stripe width does not increase * Crash in the very beginning of reflow can trash the pool * Pool must be healthy during reflow * Does not use SIMD instructions for raidz math * Documentation * Automated tests! This feature should only be used on test pools. The pool will eventually need to be **DESTROYED**, because the on-disk format will not be compatible with the final release. Additionally, there are currently bugs in RAID-Z expansion which can occasionally cause data loss. I would especially appreciate if anyone has time to write some automated tests for RAID-Z expansion in the ZFS Test Suite (including converting the raidz_expand_test.sh script into a proper test). Sponsored by: The FreeBSD Foundation --- cmd/Makefile.am | 2 +- cmd/zpool/zpool_main.c | 97 + cmd/ztest/ztest.c | 32 +- include/sys/fs/zfs.h | 19 + include/sys/spa_impl.h | 4 + include/sys/vdev_raidz.h | 47 + include/sys/vdev_raidz_impl.h | 52 +- lib/libzfs/libzfs_pool.c | 22 + lib/libzpool/Makefile.am | 9 - module/zfs/Makefile.in | 16 - module/zfs/abd.c | 6 + module/zfs/spa.c | 9415 +++++++++++++++-------------- module/zfs/spa_misc.c | 4 +- module/zfs/vdev.c | 56 +- module/zfs/vdev_label.c | 34 +- module/zfs/vdev_raidz.c | 2723 ++++++--- module/zfs/vdev_raidz_math_impl.h | 1 + scripts/raidz_expand_test.sh | 139 + 18 files changed, 7156 insertions(+), 5522 deletions(-) create mode 100755 scripts/raidz_expand_test.sh diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 88609e455f2..f1ade0390b5 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,5 +1,5 @@ SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest -SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid +SUBDIRS += fsck_zfs vdev_id zgenhostid if USING_PYTHON SUBDIRS += arcstat arc_summary dbufstat diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3b7aaa0e73a..4859d2cd7da 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -7200,6 +7200,99 @@ print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs) } } +/* + * Print out detailed raidz expansion status. + */ +static void +print_raidz_expand_status(zpool_handle_t *zhp, pool_raidz_expand_stat_t *pres) +{ + char copied_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; + time_t start, end; + nvlist_t *config, *nvroot; + nvlist_t **child; + uint_t children; + + if (pres == NULL || pres->pres_state == DSS_NONE) + return; + + /* + * Determine name of vdev. + */ + config = zpool_get_config(zhp, NULL); + nvroot = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE); + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + assert(pres->pres_expanding_vdev < children); + + (void) printf(gettext("raidz expand: ")); + + start = pres->pres_start_time; + end = pres->pres_end_time; + zfs_nicenum(pres->pres_reflowed, copied_buf, sizeof (copied_buf)); + + /* + * Expansion is finished or canceled. + */ + if (pres->pres_state == DSS_FINISHED) { + uint64_t minutes_taken = (end - start) / 60; + + (void) printf(gettext("Expansion of vdev %u copied %s " + "in %lluh%um, completed on %s"), + (int)pres->pres_expanding_vdev, + copied_buf, + (u_longlong_t)(minutes_taken / 60), + (uint_t)(minutes_taken % 60), + ctime((time_t *)&end)); + } else if (pres->pres_state == DSS_CANCELED) { + (void) printf(gettext("Expansion of vdev %u canceled on %s"), + (int)pres->pres_expanding_vdev, ctime(&end)); + } else { + uint64_t copied, total, elapsed, mins_left, hours_left; + double fraction_done; + uint_t rate; + + assert(pres->pres_state == DSS_SCANNING); + + /* + * Expansion is in progress. + */ + (void) printf(gettext( + "Expansion of vdev %u in progress since %s"), + (int)pres->pres_expanding_vdev, ctime(&start)); + + copied = pres->pres_reflowed > 0 ? pres->pres_reflowed : 1; + total = pres->pres_to_reflow; + fraction_done = (double)copied / total; + + /* elapsed time for this pass */ + elapsed = time(NULL) - pres->pres_start_time; + elapsed = elapsed > 0 ? elapsed : 1; + rate = copied / elapsed; + rate = rate > 0 ? rate : 1; + mins_left = ((total - copied) / rate) / 60; + hours_left = mins_left / 60; + + zfs_nicenum(copied, examined_buf, sizeof (examined_buf)); + zfs_nicenum(total, total_buf, sizeof (total_buf)); + zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); + + /* + * do not print estimated time if hours_left is more than + * 30 days + */ + (void) printf(gettext(" %s copied out of %s at %s/s, " + "%.2f%% done"), + examined_buf, total_buf, rate_buf, 100 * fraction_done); + if (hours_left < (30 * 24)) { + (void) printf(gettext(", %lluh%um to go\n"), + (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + } else { + (void) printf(gettext( + ", (copy is slow, no estimated time)\n")); + } + } +} static void print_checkpoint_status(pool_checkpoint_stat_t *pcs) { @@ -7676,6 +7769,7 @@ status_callback(zpool_handle_t *zhp, void *data) pool_checkpoint_stat_t *pcs = NULL; pool_scan_stat_t *ps = NULL; pool_removal_stat_t *prs = NULL; + pool_raidz_expand_stat_t *pres = NULL; (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); @@ -7683,11 +7777,14 @@ status_callback(zpool_handle_t *zhp, void *data) ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_RAIDZ_EXPAND_STATS, (uint64_t **)&pres, &c); print_scan_status(ps); print_checkpoint_scan_warning(ps, pcs); print_removal_status(zhp, prs); print_checkpoint_status(pcs); + print_raidz_expand_status(zhp, pres); cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0, cbp->cb_name_flags | VDEV_NAME_TYPE_ID); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 8fe412672ff..082ac4b5fe7 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -268,9 +268,9 @@ typedef struct bufwad { * still need to map from object ID to rangelock_t. */ typedef enum { - RL_READER, - RL_WRITER, - RL_APPEND + ZTRL_READER, + ZTRL_WRITER, + ZTRL_APPEND } rl_type_t; typedef struct rll { @@ -1381,7 +1381,7 @@ ztest_rll_lock(rll_t *rll, rl_type_t type) { mutex_enter(&rll->rll_lock); - if (type == RL_READER) { + if (type == ZTRL_READER) { while (rll->rll_writer != NULL) (void) cv_wait(&rll->rll_cv, &rll->rll_lock); rll->rll_readers++; @@ -1862,7 +1862,7 @@ ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap) zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); ASSERT(object != 0); - ztest_object_lock(zd, object, RL_WRITER); + ztest_object_lock(zd, object, ZTRL_WRITER); VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); @@ -1932,8 +1932,8 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) if (bt->bt_magic != BT_MAGIC) bt = NULL; - ztest_object_lock(zd, lr->lr_foid, RL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); + ztest_object_lock(zd, lr->lr_foid, ZTRL_READER); + rl = ztest_range_lock(zd, lr->lr_foid, offset, length, ZTRL_WRITER); VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); @@ -2035,9 +2035,9 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - ztest_object_lock(zd, lr->lr_foid, RL_READER); + ztest_object_lock(zd, lr->lr_foid, ZTRL_READER); rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, - RL_WRITER); + ZTRL_WRITER); tx = dmu_tx_create(os); @@ -2077,7 +2077,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - ztest_object_lock(zd, lr->lr_foid, RL_WRITER); + ztest_object_lock(zd, lr->lr_foid, ZTRL_WRITER); VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); @@ -2201,7 +2201,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, ASSERT3P(zio, !=, NULL); ASSERT3U(size, !=, 0); - ztest_object_lock(zd, object, RL_READER); + ztest_object_lock(zd, object, ZTRL_READER); error = dmu_bonus_hold(os, object, FTAG, &db); if (error) { ztest_object_unlock(zd, object); @@ -2226,7 +2226,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, if (buf != NULL) { /* immediate write */ zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, - object, offset, size, RL_READER); + object, offset, size, ZTRL_READER); error = dmu_read(os, object, offset, size, buf, DMU_READ_NO_PREFETCH); @@ -2241,7 +2241,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, } zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, - object, offset, size, RL_READER); + object, offset, size, ZTRL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, DMU_READ_NO_PREFETCH); @@ -2318,7 +2318,7 @@ ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) ASSERT(od->od_object != 0); ASSERT(missing == 0); /* there should be no gaps */ - ztest_object_lock(zd, od->od_object, RL_READER); + ztest_object_lock(zd, od->od_object, ZTRL_READER); VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, od->od_object, FTAG, &db)); dmu_object_info_from_db(db, &doi); @@ -2491,8 +2491,8 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) txg_wait_synced(dmu_objset_pool(os), 0); - ztest_object_lock(zd, object, RL_READER); - rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); + ztest_object_lock(zd, object, ZTRL_READER); + rl = ztest_range_lock(zd, object, offset, size, ZTRL_WRITER); tx = dmu_tx_create(os); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 6b780724245..459e1a1eee3 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -619,6 +619,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */ #define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */ #define ZPOOL_CONFIG_CHECKPOINT_STATS "checkpoint_stats" /* not on disk */ +#define ZPOOL_CONFIG_RAIDZ_EXPAND_STATS "raidz_expand_stats" /* not on disk */ #define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */ #define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */ @@ -679,6 +680,8 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_SPARES "spares" #define ZPOOL_CONFIG_IS_SPARE "is_spare" #define ZPOOL_CONFIG_NPARITY "nparity" +#define ZPOOL_CONFIG_RAIDZ_LOGICAL_WIDTH "raidz_logical_width" +#define ZPOOL_CONFIG_RAIDZ_EXPAND_OFFSET "raidz_expand_offset" #define ZPOOL_CONFIG_HOSTID "hostid" #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_LOADED_TIME "initial_load_time" @@ -773,6 +776,13 @@ typedef struct zpool_load_policy { #define VDEV_TOP_ZAP_ALLOCATION_BIAS \ "org.zfsonlinux:allocation_bias" +#define VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE \ + "org.freebsd:raidz_expand_state" +#define VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME \ + "org.freebsd:raidz_expand_start_time" +#define VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME \ + "org.freebsd:raidz_expand_end_time" + /* vdev metaslab allocation bias */ #define VDEV_ALLOC_BIAS_LOG "log" #define VDEV_ALLOC_BIAS_SPECIAL "special" @@ -975,6 +985,15 @@ typedef struct pool_removal_stat { uint64_t prs_mapping_memory; } pool_removal_stat_t; +typedef struct pool_raidz_expand_stat { + uint64_t pres_state; /* dsl_scan_state_t */ + uint64_t pres_expanding_vdev; + uint64_t pres_start_time; + uint64_t pres_end_time; + uint64_t pres_to_reflow; /* bytes that need to be moved */ + uint64_t pres_reflowed; /* bytes moved so far */ +} pool_raidz_expand_stat_t; + typedef enum dsl_scan_state { DSS_NONE, DSS_SCANNING, diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 659c69738fa..71bf5f69ab8 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -306,6 +307,9 @@ struct spa { spa_condensing_indirect_t *spa_condensing_indirect; zthr_t *spa_condense_zthr; /* zthr doing condense. */ + vdev_raidz_expand_t *spa_raidz_expand; + zthr_t *spa_raidz_expand_zthr; + uint64_t spa_checkpoint_txg; /* the txg of the checkpoint */ spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h index 0ce2b5ea1d6..845939329b6 100644 --- a/include/sys/vdev_raidz.h +++ b/include/sys/vdev_raidz.h @@ -26,6 +26,7 @@ #define _SYS_VDEV_RAIDZ_H #include +#include #ifdef __cplusplus extern "C" { @@ -57,6 +58,52 @@ int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *, const int); int vdev_raidz_impl_set(const char *); +typedef struct vdev_raidz_expand { + uint64_t vre_vdev_id; + + kmutex_t vre_lock; + kcondvar_t vre_cv; + + /* + * How much i/o is outstanding (issued and not completed). + */ + uint64_t vre_outstanding_bytes; + + /* + * Next offset to issue i/o for. + */ + uint64_t vre_offset; + + /* + * Next offset to issue i/o for which has been synced to disk. + */ + uint64_t vre_offset_phys; + + uint64_t vre_offset_pertxg[TXG_SIZE]; + + dsl_scan_state_t vre_state; + time_t vre_start_time; + time_t vre_end_time; + + rangelock_t vre_rangelock; +} vdev_raidz_expand_t; + +typedef struct vdev_raidz { + int vd_logical_width; + int vd_physical_width; + int vd_nparity; + /* + * If this vdev is being expanded, spa_raidz_expand is set to this + */ + vdev_raidz_expand_t vn_vre; +} vdev_raidz_t; + +extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); +extern void vdev_raidz_config_generate(vdev_t *, nvlist_t *); +extern void *vdev_raidz_get_tsd(spa_t *, nvlist_t *); +extern void spa_start_raidz_expansion_thread(spa_t *); +extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); +extern int vdev_raidz_load(vdev_t *); #ifdef __cplusplus } #endif diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h index 2e38962cc31..bcde2adb386 100644 --- a/include/sys/vdev_raidz_impl.h +++ b/include/sys/vdev_raidz_impl.h @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -101,17 +102,7 @@ typedef struct raidz_impl_ops { char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */ } raidz_impl_ops_t; -typedef struct raidz_col { - uint64_t rc_devidx; /* child device index for I/O */ - uint64_t rc_offset; /* device offset */ - uint64_t rc_size; /* I/O size */ - abd_t *rc_abd; /* I/O data */ - void *rc_gdata; /* used to store the "good" version */ - int rc_error; /* I/O error for this device */ - uint8_t rc_tried; /* Did we attempt this I/O column? */ - uint8_t rc_skipped; /* Did we skip this I/O column? */ -} raidz_col_t; - +#if 0 typedef struct raidz_map { uint64_t rm_cols; /* Regular column count */ uint64_t rm_scols; /* Count including skipped columns */ @@ -129,6 +120,42 @@ typedef struct raidz_map { const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ } raidz_map_t; +#endif + +typedef struct raidz_col { + uint64_t rc_devidx; /* child device index for I/O */ + uint64_t rc_offset; /* device offset */ + uint64_t rc_size; /* I/O size */ + abd_t *rc_abd; /* I/O data */ + void *rc_orig_data; /* pre-reconstruction */ + abd_t *rc_gdata; /* used to store the "good" version */ + int rc_error; /* I/O error for this device */ + uint8_t rc_tried; /* Did we attempt this I/O column? */ + uint8_t rc_skipped; /* Did we skip this I/O column? */ + uint8_t rc_need_orig_restore; /* need to restore from orig_data? */ +} raidz_col_t; + +typedef struct raidz_row { + uint64_t rr_cols; /* Regular column count */ + uint64_t rr_missingdata; /* Count of missing data devices */ + uint64_t rr_missingparity; /* Count of missing parity devices */ + uint64_t rr_firstdatacol; /* First data column/parity count */ + abd_t *rr_abd_copy; /* rm_asize-buffer of copied data */ + int rr_code; /* reconstruction code */ + raidz_col_t rr_col[0]; /* Flexible array of I/O columns */ +} raidz_row_t; + +typedef struct raidz_map { + uintptr_t rm_reports; /* # of referencing checksum reports */ + boolean_t rm_freed; /* map no longer has referencing ZIO */ + boolean_t rm_ecksuminjected; /* checksum error was injected */ + int rm_nrows; + int rm_nskip; /* Sectors skipped for padding */ + locked_range_t *rm_lr; + raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ + raidz_row_t *rm_row[0]; /* flexible array of rows */ +} raidz_map_t; + #define RAIDZ_ORIGINAL_IMPL (INT_MAX) @@ -158,6 +185,7 @@ extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; * * raidz_parity Returns parity of the RAIDZ block * raidz_ncols Returns number of columns the block spans + * Note, all rows have the same number of columns. * raidz_nbigcols Returns number of big columns * raidz_col_p Returns pointer to a column * raidz_col_size Returns size of a column @@ -165,7 +193,7 @@ extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; * raidz_short_size Returns size of short columns */ #define raidz_parity(rm) ((rm)->rm_firstdatacol) -#define raidz_ncols(rm) ((rm)->rm_cols) +#define raidz_ncols(rm) ((rm)->rm_row[0]->rr_cols) #define raidz_nbigcols(rm) ((rm)->rm_bigcols) #define raidz_col_p(rm, c) ((rm)->rm_col + (c)) #define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size) diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index d45b87ce652..3ef65f2684b 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3131,6 +3131,28 @@ zpool_vdev_attach(zpool_handle_t *zhp, verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); zc.zc_cookie = replacing; + char *typestr; + if (nvlist_lookup_string(tgt, ZPOOL_CONFIG_TYPE, &typestr) == 0 && + strcmp(typestr, "raidz") == 0) { + printf( + " *****************************************************\n" + " * Thank you for testing this alpha-quality release *\n" + " * of RAID-Z expansion. This feature should only *\n" + " * be used on test pools. The pool will eventually *\n" + " * need to be DESTROYED, because the on-disk format *\n" + " * will not be compatible with the final release. *\n" + " * Additionally, there are currently bugs in RAID-Z *\n" + " * expansion which can occasionally cause data loss. *\n" + " * Please report bugs to mahrens@delphix.com. *\n" + " *****************************************************\n"); + for (int i = 5; i > 0; i--) { + printf("\nYou have %u seconds to abort by " + "pressing ^C (control-C)\n", i); + sleep(1); + } + + } + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0 || children != 1) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 91f47503a3b..bb28c8d0c70 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -119,15 +119,6 @@ KERNEL_C = \ vdev_missing.c \ vdev_queue.c \ vdev_raidz.c \ - vdev_raidz_math_aarch64_neon.c \ - vdev_raidz_math_aarch64_neonx2.c \ - vdev_raidz_math_avx2.c \ - vdev_raidz_math_avx512bw.c \ - vdev_raidz_math_avx512f.c \ - vdev_raidz_math.c \ - vdev_raidz_math_scalar.c \ - vdev_raidz_math_sse2.c \ - vdev_raidz_math_ssse3.c \ vdev_removal.c \ vdev_root.c \ vdev_trim.c \ diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index b2460f0d657..44f9ef0f9a1 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -95,8 +95,6 @@ $(MODULE)-objs += vdev_mirror.o $(MODULE)-objs += vdev_missing.o $(MODULE)-objs += vdev_queue.o $(MODULE)-objs += vdev_raidz.o -$(MODULE)-objs += vdev_raidz_math.o -$(MODULE)-objs += vdev_raidz_math_scalar.o $(MODULE)-objs += vdev_removal.o $(MODULE)-objs += vdev_root.o $(MODULE)-objs += vdev_trim.o @@ -148,17 +146,3 @@ $(MODULE)-objs += dsl_userhold.o $(MODULE)-objs += qat.o $(MODULE)-objs += qat_compress.o $(MODULE)-objs += qat_crypt.o - -# Suppress incorrect warnings from versions of objtool which are not -# aware of x86 EVEX prefix instructions used for AVX512. -OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y -OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y - -$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse2.o -$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_ssse3.o -$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o -$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512f.o -$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o - -$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o -$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 8b2514404a8..5522620d2f9 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -1162,6 +1162,9 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size, int ret = 0; struct abd_iter aiter; + if (size == 0) + return (ret); + abd_verify(abd); ASSERT3U(off + size, <=, abd->abd_size); @@ -1290,6 +1293,9 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, int ret = 0; struct abd_iter daiter, saiter; + if (size == 0) + return (ret); + abd_verify(dabd); abd_verify(sabd); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index cf8462cec33..a5a9c763d1a 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1,46 +1,46 @@ /* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ +* CDDL HEADER START +* +* The contents of this file are subject to the terms of the +* Common Development and Distribution License (the "License"). +* You may not use this file except in compliance with the License. +* +* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +* or http://www.opensolaris.org/os/licensing. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* When distributing Covered Code, include this CDDL HEADER in each +* file and include the License file at usr/src/OPENSOLARIS.LICENSE. +* If applicable, add the following below this CDDL HEADER, with the +* fields enclosed by brackets "[]" replaced with your own identifying +* information: Portions Copyright [yyyy] [name of copyright owner] +* +* CDDL HEADER END +*/ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. - * Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. - * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Toomas Soome - * Copyright (c) 2016 Actifio, Inc. All rights reserved. - * Copyright 2018 Joyent, Inc. - * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. - * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017, Intel Corporation. - */ +* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 2011, 2019 by Delphix. All rights reserved. +* Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. +* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. +* Copyright 2013 Saso Kiselkov. All rights reserved. +* Copyright (c) 2014 Integros [integros.com] +* Copyright 2016 Toomas Soome +* Copyright (c) 2016 Actifio, Inc. All rights reserved. +* Copyright 2018 Joyent, Inc. +* Copyright (c) 2017, 2019, Datto Inc. All rights reserved. +* Copyright 2017 Joyent, Inc. +* Copyright (c) 2017, Intel Corporation. +*/ /* - * SPA: Storage Pool Allocator - * - * This file contains all the routines used when modifying on-disk SPA state. - * This includes opening, importing, destroying, exporting a pool, and syncing a - * pool. - */ +* SPA: Storage Pool Allocator +* +* This file contains all the routines used when modifying on-disk SPA state. +* This includes opening, importing, destroying, exporting a pool, and syncing a +* pool. +*/ #include #include @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -97,16 +98,16 @@ #include "zfs_comutil.h" /* - * The interval, in seconds, at which failed configuration cache file writes - * should be retried. - */ +* The interval, in seconds, at which failed configuration cache file writes +* should be retried. +*/ int zfs_ccw_retry_interval = 300; typedef enum zti_modes { - ZTI_MODE_FIXED, /* value is # of threads (min 1) */ - ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ - ZTI_MODE_NULL, /* don't create a taskq */ - ZTI_NMODES +ZTI_MODE_FIXED, /* value is # of threads (min 1) */ +ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ +ZTI_MODE_NULL, /* don't create a taskq */ +ZTI_NMODES } zti_modes_t; #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } @@ -118,41 +119,41 @@ typedef enum zti_modes { #define ZTI_ONE ZTI_N(1) typedef struct zio_taskq_info { - zti_modes_t zti_mode; - uint_t zti_value; - uint_t zti_count; +zti_modes_t zti_mode; +uint_t zti_value; +uint_t zti_count; } zio_taskq_info_t; static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { - "iss", "iss_h", "int", "int_h" +"iss", "iss_h", "int", "int_h" }; /* - * This table defines the taskq settings for each ZFS I/O type. When - * initializing a pool, we use this table to create an appropriately sized - * taskq. Some operations are low volume and therefore have a small, static - * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE - * macros. Other operations process a large amount of data; the ZTI_BATCH - * macro causes us to create a taskq oriented for throughput. Some operations - * are so high frequency and short-lived that the taskq itself can become a - * point of lock contention. The ZTI_P(#, #) macro indicates that we need an - * additional degree of parallelism specified by the number of threads per- - * taskq and the number of taskqs; when dispatching an event in this case, the - * particular taskq is chosen at random. - * - * The different taskq priorities are to handle the different contexts (issue - * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that - * need to be handled with minimum delay. - */ +* This table defines the taskq settings for each ZFS I/O type. When +* initializing a pool, we use this table to create an appropriately sized +* taskq. Some operations are low volume and therefore have a small, static +* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE +* macros. Other operations process a large amount of data; the ZTI_BATCH +* macro causes us to create a taskq oriented for throughput. Some operations +* are so high frequency and short-lived that the taskq itself can become a +* point of lock contention. The ZTI_P(#, #) macro indicates that we need an +* additional degree of parallelism specified by the number of threads per- +* taskq and the number of taskqs; when dispatching an event in this case, the +* particular taskq is chosen at random. +* +* The different taskq priorities are to handle the different contexts (issue +* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that +* need to be handled with minimum delay. +*/ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { - /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ - { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ - { ZTI_BATCH, ZTI_N(5), ZTI_P(12, 8), ZTI_N(5) }, /* WRITE */ - { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ - { ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ +/* ISSUE ISSUE_HIGH INTR INTR_HIGH */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ +{ ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ +{ ZTI_BATCH, ZTI_N(5), ZTI_P(12, 8), ZTI_N(5) }, /* WRITE */ +{ ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ +{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ }; static void spa_sync_version(void *arg, dmu_tx_t *tx); @@ -168,1954 +169,1962 @@ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ /* - * Report any spa_load_verify errors found, but do not fail spa_load. - * This is used by zdb to analyze non-idle pools. - */ +* Report any spa_load_verify errors found, but do not fail spa_load. +* This is used by zdb to analyze non-idle pools. +*/ boolean_t spa_load_verify_dryrun = B_FALSE; /* - * This (illegal) pool name is used when temporarily importing a spa_t in order - * to get the vdev stats associated with the imported devices. - */ +* This (illegal) pool name is used when temporarily importing a spa_t in order +* to get the vdev stats associated with the imported devices. +*/ #define TRYIMPORT_NAME "$import" /* - * For debugging purposes: print out vdev tree during pool import. - */ +* For debugging purposes: print out vdev tree during pool import. +*/ int spa_load_print_vdev_tree = B_FALSE; /* - * A non-zero value for zfs_max_missing_tvds means that we allow importing - * pools with missing top-level vdevs. This is strictly intended for advanced - * pool recovery cases since missing data is almost inevitable. Pools with - * missing devices can only be imported read-only for safety reasons, and their - * fail-mode will be automatically set to "continue". - * - * With 1 missing vdev we should be able to import the pool and mount all - * datasets. User data that was not modified after the missing device has been - * added should be recoverable. This means that snapshots created prior to the - * addition of that device should be completely intact. - * - * With 2 missing vdevs, some datasets may fail to mount since there are - * dataset statistics that are stored as regular metadata. Some data might be - * recoverable if those vdevs were added recently. - * - * With 3 or more missing vdevs, the pool is severely damaged and MOS entries - * may be missing entirely. Chances of data recovery are very low. Note that - * there are also risks of performing an inadvertent rewind as we might be - * missing all the vdevs with the latest uberblocks. - */ +* A non-zero value for zfs_max_missing_tvds means that we allow importing +* pools with missing top-level vdevs. This is strictly intended for advanced +* pool recovery cases since missing data is almost inevitable. Pools with +* missing devices can only be imported read-only for safety reasons, and their +* fail-mode will be automatically set to "continue". +* +* With 1 missing vdev we should be able to import the pool and mount all +* datasets. User data that was not modified after the missing device has been +* added should be recoverable. This means that snapshots created prior to the +* addition of that device should be completely intact. +* +* With 2 missing vdevs, some datasets may fail to mount since there are +* dataset statistics that are stored as regular metadata. Some data might be +* recoverable if those vdevs were added recently. +* +* With 3 or more missing vdevs, the pool is severely damaged and MOS entries +* may be missing entirely. Chances of data recovery are very low. Note that +* there are also risks of performing an inadvertent rewind as we might be +* missing all the vdevs with the latest uberblocks. +*/ unsigned long zfs_max_missing_tvds = 0; /* - * The parameters below are similar to zfs_max_missing_tvds but are only - * intended for a preliminary open of the pool with an untrusted config which - * might be incomplete or out-dated. - * - * We are more tolerant for pools opened from a cachefile since we could have - * an out-dated cachefile where a device removal was not registered. - * We could have set the limit arbitrarily high but in the case where devices - * are really missing we would want to return the proper error codes; we chose - * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available - * and we get a chance to retrieve the trusted config. - */ +* The parameters below are similar to zfs_max_missing_tvds but are only +* intended for a preliminary open of the pool with an untrusted config which +* might be incomplete or out-dated. +* +* We are more tolerant for pools opened from a cachefile since we could have +* an out-dated cachefile where a device removal was not registered. +* We could have set the limit arbitrarily high but in the case where devices +* are really missing we would want to return the proper error codes; we chose +* SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available +* and we get a chance to retrieve the trusted config. +*/ uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1; /* - * In the case where config was assembled by scanning device paths (/dev/dsks - * by default) we are less tolerant since all the existing devices should have - * been detected and we want spa_load to return the right error codes. - */ +* In the case where config was assembled by scanning device paths (/dev/dsks +* by default) we are less tolerant since all the existing devices should have +* been detected and we want spa_load to return the right error codes. +*/ uint64_t zfs_max_missing_tvds_scan = 0; /* - * Debugging aid that pauses spa_sync() towards the end. - */ +* Debugging aid that pauses spa_sync() towards the end. +*/ boolean_t zfs_pause_spa_sync = B_FALSE; /* - * ========================================================================== - * SPA properties routines - * ========================================================================== - */ +* ========================================================================== +* SPA properties routines +* ========================================================================== +*/ /* - * Add a (source=src, propname=propval) list to an nvlist. - */ +* Add a (source=src, propname=propval) list to an nvlist. +*/ static void spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, - uint64_t intval, zprop_source_t src) +uint64_t intval, zprop_source_t src) { - const char *propname = zpool_prop_to_name(prop); - nvlist_t *propval; +const char *propname = zpool_prop_to_name(prop); +nvlist_t *propval; - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); +VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); +VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); - if (strval != NULL) - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); - else - VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); +if (strval != NULL) + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); +else + VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); - VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); - nvlist_free(propval); +VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); +nvlist_free(propval); } /* - * Get property values from the spa configuration. - */ +* Get property values from the spa configuration. +*/ static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { - vdev_t *rvd = spa->spa_root_vdev; - dsl_pool_t *pool = spa->spa_dsl_pool; - uint64_t size, alloc, cap, version; - const zprop_source_t src = ZPROP_SRC_NONE; - spa_config_dirent_t *dp; - metaslab_class_t *mc = spa_normal_class(spa); - - ASSERT(MUTEX_HELD(&spa->spa_props_lock)); - - if (rvd != NULL) { - alloc = metaslab_class_get_alloc(mc); - alloc += metaslab_class_get_alloc(spa_special_class(spa)); - alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); - - size = metaslab_class_get_space(mc); - size += metaslab_class_get_space(spa_special_class(spa)); - size += metaslab_class_get_space(spa_dedup_class(spa)); - - spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, - size - alloc, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, - spa->spa_checkpoint_info.sci_dspace, src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, - metaslab_class_fragmentation(mc), src); - spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, - metaslab_class_expandable_space(mc), src); - spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, - (spa_mode(spa) == FREAD), src); - - cap = (size == 0) ? 0 : (alloc * 100 / size); - spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, - ddt_get_pool_dedup_ratio(spa), src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, - rvd->vdev_state, src); - - version = spa_version(spa); - if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, - version, ZPROP_SRC_DEFAULT); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, - version, ZPROP_SRC_LOCAL); - } - spa_prop_add_list(*nvp, ZPOOL_PROP_LOAD_GUID, - NULL, spa_load_guid(spa), src); +vdev_t *rvd = spa->spa_root_vdev; +dsl_pool_t *pool = spa->spa_dsl_pool; +uint64_t size, alloc, cap, version; +const zprop_source_t src = ZPROP_SRC_NONE; +spa_config_dirent_t *dp; +metaslab_class_t *mc = spa_normal_class(spa); + +ASSERT(MUTEX_HELD(&spa->spa_props_lock)); + +if (rvd != NULL) { + alloc = metaslab_class_get_alloc(mc); + alloc += metaslab_class_get_alloc(spa_special_class(spa)); + alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); + + size = metaslab_class_get_space(mc); + size += metaslab_class_get_space(spa_special_class(spa)); + size += metaslab_class_get_space(spa_dedup_class(spa)); + + spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, + size - alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, + spa->spa_checkpoint_info.sci_dspace, src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, + metaslab_class_fragmentation(mc), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, + metaslab_class_expandable_space(mc), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, + (spa_mode(spa) == FREAD), src); + + cap = (size == 0) ? 0 : (alloc * 100 / size); + spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, + ddt_get_pool_dedup_ratio(spa), src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, + rvd->vdev_state, src); + + version = spa_version(spa); + if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, + version, ZPROP_SRC_DEFAULT); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, + version, ZPROP_SRC_LOCAL); } + spa_prop_add_list(*nvp, ZPOOL_PROP_LOAD_GUID, + NULL, spa_load_guid(spa), src); +} - if (pool != NULL) { - /* - * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, - * when opening pools before this version freedir will be NULL. - */ - if (pool->dp_free_dir != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, - dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, - src); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, - NULL, 0, src); - } +if (pool != NULL) { + /* + * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, + * when opening pools before this version freedir will be NULL. + */ + if (pool->dp_free_dir != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, + dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, + src); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, + NULL, 0, src); + } - if (pool->dp_leak_dir != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, - dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, - src); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, - NULL, 0, src); - } + if (pool->dp_leak_dir != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, + dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, + src); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, + NULL, 0, src); } +} - spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); +spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); - if (spa->spa_comment != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, - 0, ZPROP_SRC_LOCAL); - } +if (spa->spa_comment != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, + 0, ZPROP_SRC_LOCAL); +} - if (spa->spa_root != NULL) - spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, - 0, ZPROP_SRC_LOCAL); +if (spa->spa_root != NULL) + spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, + 0, ZPROP_SRC_LOCAL); - if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, - MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, - SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); - } +if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); +} else { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); +} - if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, - DNODE_MAX_SIZE, ZPROP_SRC_NONE); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, - DNODE_MIN_SIZE, ZPROP_SRC_NONE); - } +if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, + DNODE_MAX_SIZE, ZPROP_SRC_NONE); +} else { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, + DNODE_MIN_SIZE, ZPROP_SRC_NONE); +} - if ((dp = list_head(&spa->spa_config_list)) != NULL) { - if (dp->scd_path == NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, - "none", 0, ZPROP_SRC_LOCAL); - } else if (strcmp(dp->scd_path, spa_config_path) != 0) { - spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, - dp->scd_path, 0, ZPROP_SRC_LOCAL); - } +if ((dp = list_head(&spa->spa_config_list)) != NULL) { + if (dp->scd_path == NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + "none", 0, ZPROP_SRC_LOCAL); + } else if (strcmp(dp->scd_path, spa_config_path) != 0) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + dp->scd_path, 0, ZPROP_SRC_LOCAL); } } +} /* - * Get zpool property values. - */ +* Get zpool property values. +*/ int spa_prop_get(spa_t *spa, nvlist_t **nvp) { - objset_t *mos = spa->spa_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; - int err; - - err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); - if (err) - return (err); - - mutex_enter(&spa->spa_props_lock); +objset_t *mos = spa->spa_meta_objset; +zap_cursor_t zc; +zap_attribute_t za; +int err; - /* - * Get properties from the spa config. - */ - spa_prop_get_config(spa, nvp); - - /* If no pool property object, no more prop to get. */ - if (mos == NULL || spa->spa_pool_props_object == 0) { - mutex_exit(&spa->spa_props_lock); - goto out; - } +err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); +if (err) + return (err); - /* - * Get properties from the MOS pool property object. - */ - for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); - (err = zap_cursor_retrieve(&zc, &za)) == 0; - zap_cursor_advance(&zc)) { - uint64_t intval = 0; - char *strval = NULL; - zprop_source_t src = ZPROP_SRC_DEFAULT; - zpool_prop_t prop; +mutex_enter(&spa->spa_props_lock); - if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL) - continue; +/* + * Get properties from the spa config. + */ +spa_prop_get_config(spa, nvp); - switch (za.za_integer_length) { - case 8: - /* integer property */ - if (za.za_first_integer != - zpool_prop_default_numeric(prop)) - src = ZPROP_SRC_LOCAL; - - if (prop == ZPOOL_PROP_BOOTFS) { - dsl_pool_t *dp; - dsl_dataset_t *ds = NULL; - - dp = spa_get_dsl(spa); - dsl_pool_config_enter(dp, FTAG); - err = dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &ds); - if (err != 0) { - dsl_pool_config_exit(dp, FTAG); - break; - } +/* If no pool property object, no more prop to get. */ +if (mos == NULL || spa->spa_pool_props_object == 0) { + mutex_exit(&spa->spa_props_lock); + goto out; +} - strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, - KM_SLEEP); - dsl_dataset_name(ds, strval); - dsl_dataset_rele(ds, FTAG); +/* + * Get properties from the MOS pool property object. + */ +for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); + (err = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + uint64_t intval = 0; + char *strval = NULL; + zprop_source_t src = ZPROP_SRC_DEFAULT; + zpool_prop_t prop; + + if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL) + continue; + + switch (za.za_integer_length) { + case 8: + /* integer property */ + if (za.za_first_integer != + zpool_prop_default_numeric(prop)) + src = ZPROP_SRC_LOCAL; + + if (prop == ZPOOL_PROP_BOOTFS) { + dsl_pool_t *dp; + dsl_dataset_t *ds = NULL; + + dp = spa_get_dsl(spa); + dsl_pool_config_enter(dp, FTAG); + err = dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &ds); + if (err != 0) { dsl_pool_config_exit(dp, FTAG); - } else { - strval = NULL; - intval = za.za_first_integer; + break; } - spa_prop_add_list(*nvp, prop, strval, intval, src); + strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, + KM_SLEEP); + dsl_dataset_name(ds, strval); + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + } else { + strval = NULL; + intval = za.za_first_integer; + } - if (strval != NULL) - kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); + spa_prop_add_list(*nvp, prop, strval, intval, src); - break; + if (strval != NULL) + kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); - case 1: - /* string property */ - strval = kmem_alloc(za.za_num_integers, KM_SLEEP); - err = zap_lookup(mos, spa->spa_pool_props_object, - za.za_name, 1, za.za_num_integers, strval); - if (err) { - kmem_free(strval, za.za_num_integers); - break; - } - spa_prop_add_list(*nvp, prop, strval, 0, src); - kmem_free(strval, za.za_num_integers); - break; + break; - default: + case 1: + /* string property */ + strval = kmem_alloc(za.za_num_integers, KM_SLEEP); + err = zap_lookup(mos, spa->spa_pool_props_object, + za.za_name, 1, za.za_num_integers, strval); + if (err) { + kmem_free(strval, za.za_num_integers); break; } + spa_prop_add_list(*nvp, prop, strval, 0, src); + kmem_free(strval, za.za_num_integers); + break; + + default: + break; } - zap_cursor_fini(&zc); - mutex_exit(&spa->spa_props_lock); +} +zap_cursor_fini(&zc); +mutex_exit(&spa->spa_props_lock); out: - if (err && err != ENOENT) { - nvlist_free(*nvp); - *nvp = NULL; - return (err); - } +if (err && err != ENOENT) { + nvlist_free(*nvp); + *nvp = NULL; + return (err); +} - return (0); +return (0); } /* - * Validate the given pool properties nvlist and modify the list - * for the property values to be set. - */ +* Validate the given pool properties nvlist and modify the list +* for the property values to be set. +*/ static int spa_prop_validate(spa_t *spa, nvlist_t *props) { - nvpair_t *elem; - int error = 0, reset_bootfs = 0; - uint64_t objnum = 0; - boolean_t has_feature = B_FALSE; - - elem = NULL; - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - uint64_t intval; - char *strval, *slash, *check, *fname; - const char *propname = nvpair_name(elem); - zpool_prop_t prop = zpool_name_to_prop(propname); - - switch (prop) { - case ZPOOL_PROP_INVAL: - if (!zpool_prop_feature(propname)) { - error = SET_ERROR(EINVAL); - break; - } - - /* - * Sanitize the input. - */ - if (nvpair_type(elem) != DATA_TYPE_UINT64) { - error = SET_ERROR(EINVAL); - break; - } - - if (nvpair_value_uint64(elem, &intval) != 0) { - error = SET_ERROR(EINVAL); - break; - } - - if (intval != 0) { - error = SET_ERROR(EINVAL); - break; - } +nvpair_t *elem; +int error = 0, reset_bootfs = 0; +uint64_t objnum = 0; +boolean_t has_feature = B_FALSE; + +elem = NULL; +while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + uint64_t intval; + char *strval, *slash, *check, *fname; + const char *propname = nvpair_name(elem); + zpool_prop_t prop = zpool_name_to_prop(propname); + + switch (prop) { + case ZPOOL_PROP_INVAL: + if (!zpool_prop_feature(propname)) { + error = SET_ERROR(EINVAL); + break; + } - fname = strchr(propname, '@') + 1; - if (zfeature_lookup_name(fname, NULL) != 0) { - error = SET_ERROR(EINVAL); - break; - } + /* + * Sanitize the input. + */ + if (nvpair_type(elem) != DATA_TYPE_UINT64) { + error = SET_ERROR(EINVAL); + break; + } - has_feature = B_TRUE; + if (nvpair_value_uint64(elem, &intval) != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_VERSION: - error = nvpair_value_uint64(elem, &intval); - if (!error && - (intval < spa_version(spa) || - intval > SPA_VERSION_BEFORE_FEATURES || - has_feature)) - error = SET_ERROR(EINVAL); + if (intval != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_DELEGATION: - case ZPOOL_PROP_AUTOREPLACE: - case ZPOOL_PROP_LISTSNAPS: - case ZPOOL_PROP_AUTOEXPAND: - case ZPOOL_PROP_AUTOTRIM: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > 1) - error = SET_ERROR(EINVAL); + fname = strchr(propname, '@') + 1; + if (zfeature_lookup_name(fname, NULL) != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_MULTIHOST: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > 1) - error = SET_ERROR(EINVAL); + has_feature = B_TRUE; + break; - if (!error) { - uint32_t hostid = zone_get_hostid(NULL); - if (hostid) - spa->spa_hostid = hostid; - else - error = SET_ERROR(ENOTSUP); - } + case ZPOOL_PROP_VERSION: + error = nvpair_value_uint64(elem, &intval); + if (!error && + (intval < spa_version(spa) || + intval > SPA_VERSION_BEFORE_FEATURES || + has_feature)) + error = SET_ERROR(EINVAL); + break; - break; + case ZPOOL_PROP_DELEGATION: + case ZPOOL_PROP_AUTOREPLACE: + case ZPOOL_PROP_LISTSNAPS: + case ZPOOL_PROP_AUTOEXPAND: + case ZPOOL_PROP_AUTOTRIM: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); + break; - case ZPOOL_PROP_BOOTFS: - /* - * If the pool version is less than SPA_VERSION_BOOTFS, - * or the pool is still being created (version == 0), - * the bootfs property cannot be set. - */ - if (spa_version(spa) < SPA_VERSION_BOOTFS) { - error = SET_ERROR(ENOTSUP); - break; - } + case ZPOOL_PROP_MULTIHOST: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); - /* - * Make sure the vdev config is bootable - */ - if (!vdev_is_bootable(spa->spa_root_vdev)) { + if (!error) { + uint32_t hostid = zone_get_hostid(NULL); + if (hostid) + spa->spa_hostid = hostid; + else error = SET_ERROR(ENOTSUP); - break; - } + } - reset_bootfs = 1; + break; - error = nvpair_value_string(elem, &strval); + case ZPOOL_PROP_BOOTFS: + /* + * If the pool version is less than SPA_VERSION_BOOTFS, + * or the pool is still being created (version == 0), + * the bootfs property cannot be set. + */ + if (spa_version(spa) < SPA_VERSION_BOOTFS) { + error = SET_ERROR(ENOTSUP); + break; + } - if (!error) { - objset_t *os; - uint64_t propval; + /* + * Make sure the vdev config is bootable + */ + if (!vdev_is_bootable(spa->spa_root_vdev)) { + error = SET_ERROR(ENOTSUP); + break; + } - if (strval == NULL || strval[0] == '\0') { - objnum = zpool_prop_default_numeric( - ZPOOL_PROP_BOOTFS); - break; - } + reset_bootfs = 1; - error = dmu_objset_hold(strval, FTAG, &os); - if (error != 0) - break; + error = nvpair_value_string(elem, &strval); - /* - * Must be ZPL, and its property settings - * must be supported by GRUB (compression - * is not gzip, and large dnodes are not - * used). - */ + if (!error) { + objset_t *os; + uint64_t propval; - if (dmu_objset_type(os) != DMU_OST_ZFS) { - error = SET_ERROR(ENOTSUP); - } else if ((error = - dsl_prop_get_int_ds(dmu_objset_ds(os), - zfs_prop_to_name(ZFS_PROP_COMPRESSION), - &propval)) == 0 && - !BOOTFS_COMPRESS_VALID(propval)) { - error = SET_ERROR(ENOTSUP); - } else if ((error = - dsl_prop_get_int_ds(dmu_objset_ds(os), - zfs_prop_to_name(ZFS_PROP_DNODESIZE), - &propval)) == 0 && - propval != ZFS_DNSIZE_LEGACY) { - error = SET_ERROR(ENOTSUP); - } else { - objnum = dmu_objset_id(os); - } - dmu_objset_rele(os, FTAG); + if (strval == NULL || strval[0] == '\0') { + objnum = zpool_prop_default_numeric( + ZPOOL_PROP_BOOTFS); + break; } - break; - case ZPOOL_PROP_FAILUREMODE: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > ZIO_FAILURE_MODE_PANIC) - error = SET_ERROR(EINVAL); + error = dmu_objset_hold(strval, FTAG, &os); + if (error != 0) + break; /* - * This is a special case which only occurs when - * the pool has completely failed. This allows - * the user to change the in-core failmode property - * without syncing it out to disk (I/Os might - * currently be blocked). We do this by returning - * EIO to the caller (spa_prop_set) to trick it - * into thinking we encountered a property validation - * error. + * Must be ZPL, and its property settings + * must be supported by GRUB (compression + * is not gzip, and large dnodes are not + * used). */ - if (!error && spa_suspended(spa)) { - spa->spa_failmode = intval; - error = SET_ERROR(EIO); - } - break; - - case ZPOOL_PROP_CACHEFILE: - if ((error = nvpair_value_string(elem, &strval)) != 0) - break; - - if (strval[0] == '\0') - break; - - if (strcmp(strval, "none") == 0) - break; - if (strval[0] != '/') { - error = SET_ERROR(EINVAL); - break; + if (dmu_objset_type(os) != DMU_OST_ZFS) { + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), + zfs_prop_to_name(ZFS_PROP_COMPRESSION), + &propval)) == 0 && + !BOOTFS_COMPRESS_VALID(propval)) { + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), + zfs_prop_to_name(ZFS_PROP_DNODESIZE), + &propval)) == 0 && + propval != ZFS_DNSIZE_LEGACY) { + error = SET_ERROR(ENOTSUP); + } else { + objnum = dmu_objset_id(os); } + dmu_objset_rele(os, FTAG); + } + break; - slash = strrchr(strval, '/'); - ASSERT(slash != NULL); + case ZPOOL_PROP_FAILUREMODE: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > ZIO_FAILURE_MODE_PANIC) + error = SET_ERROR(EINVAL); - if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || - strcmp(slash, "/..") == 0) - error = SET_ERROR(EINVAL); + /* + * This is a special case which only occurs when + * the pool has completely failed. This allows + * the user to change the in-core failmode property + * without syncing it out to disk (I/Os might + * currently be blocked). We do this by returning + * EIO to the caller (spa_prop_set) to trick it + * into thinking we encountered a property validation + * error. + */ + if (!error && spa_suspended(spa)) { + spa->spa_failmode = intval; + error = SET_ERROR(EIO); + } + break; + + case ZPOOL_PROP_CACHEFILE: + if ((error = nvpair_value_string(elem, &strval)) != 0) break; - case ZPOOL_PROP_COMMENT: - if ((error = nvpair_value_string(elem, &strval)) != 0) - break; - for (check = strval; *check != '\0'; check++) { - if (!isprint(*check)) { - error = SET_ERROR(EINVAL); - break; - } - } - if (strlen(strval) > ZPROP_MAX_COMMENT) - error = SET_ERROR(E2BIG); + if (strval[0] == '\0') break; - case ZPOOL_PROP_DEDUPDITTO: - if (spa_version(spa) < SPA_VERSION_DEDUP) - error = SET_ERROR(ENOTSUP); - else - error = nvpair_value_uint64(elem, &intval); - if (error == 0 && - intval != 0 && intval < ZIO_DEDUPDITTO_MIN) - error = SET_ERROR(EINVAL); + if (strcmp(strval, "none") == 0) break; - default: + if (strval[0] != '/') { + error = SET_ERROR(EINVAL); break; } - if (error) + slash = strrchr(strval, '/'); + ASSERT(slash != NULL); + + if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || + strcmp(slash, "/..") == 0) + error = SET_ERROR(EINVAL); + break; + + case ZPOOL_PROP_COMMENT: + if ((error = nvpair_value_string(elem, &strval)) != 0) break; + for (check = strval; *check != '\0'; check++) { + if (!isprint(*check)) { + error = SET_ERROR(EINVAL); + break; + } + } + if (strlen(strval) > ZPROP_MAX_COMMENT) + error = SET_ERROR(E2BIG); + break; + + case ZPOOL_PROP_DEDUPDITTO: + if (spa_version(spa) < SPA_VERSION_DEDUP) + error = SET_ERROR(ENOTSUP); + else + error = nvpair_value_uint64(elem, &intval); + if (error == 0 && + intval != 0 && intval < ZIO_DEDUPDITTO_MIN) + error = SET_ERROR(EINVAL); + break; + + default: + break; } - if (!error && reset_bootfs) { - error = nvlist_remove(props, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); + if (error) + break; +} + +if (!error && reset_bootfs) { + error = nvlist_remove(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); - if (!error) { - error = nvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); - } + if (!error) { + error = nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); } +} - return (error); +return (error); } void spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) { - char *cachefile; - spa_config_dirent_t *dp; +char *cachefile; +spa_config_dirent_t *dp; - if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), - &cachefile) != 0) - return; +if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), + &cachefile) != 0) + return; - dp = kmem_alloc(sizeof (spa_config_dirent_t), - KM_SLEEP); +dp = kmem_alloc(sizeof (spa_config_dirent_t), + KM_SLEEP); - if (cachefile[0] == '\0') - dp->scd_path = spa_strdup(spa_config_path); - else if (strcmp(cachefile, "none") == 0) - dp->scd_path = NULL; - else - dp->scd_path = spa_strdup(cachefile); +if (cachefile[0] == '\0') + dp->scd_path = spa_strdup(spa_config_path); +else if (strcmp(cachefile, "none") == 0) + dp->scd_path = NULL; +else + dp->scd_path = spa_strdup(cachefile); - list_insert_head(&spa->spa_config_list, dp); - if (need_sync) - spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); +list_insert_head(&spa->spa_config_list, dp); +if (need_sync) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } int spa_prop_set(spa_t *spa, nvlist_t *nvp) { - int error; - nvpair_t *elem = NULL; - boolean_t need_sync = B_FALSE; +int error; +nvpair_t *elem = NULL; +boolean_t need_sync = B_FALSE; - if ((error = spa_prop_validate(spa, nvp)) != 0) - return (error); +if ((error = spa_prop_validate(spa, nvp)) != 0) + return (error); - while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { - zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); +while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { + zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); - if (prop == ZPOOL_PROP_CACHEFILE || - prop == ZPOOL_PROP_ALTROOT || - prop == ZPOOL_PROP_READONLY) - continue; - - if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { - uint64_t ver; + if (prop == ZPOOL_PROP_CACHEFILE || + prop == ZPOOL_PROP_ALTROOT || + prop == ZPOOL_PROP_READONLY) + continue; - if (prop == ZPOOL_PROP_VERSION) { - VERIFY(nvpair_value_uint64(elem, &ver) == 0); - } else { - ASSERT(zpool_prop_feature(nvpair_name(elem))); - ver = SPA_VERSION_FEATURES; - need_sync = B_TRUE; - } + if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { + uint64_t ver; - /* Save time if the version is already set. */ - if (ver == spa_version(spa)) - continue; + if (prop == ZPOOL_PROP_VERSION) { + VERIFY(nvpair_value_uint64(elem, &ver) == 0); + } else { + ASSERT(zpool_prop_feature(nvpair_name(elem))); + ver = SPA_VERSION_FEATURES; + need_sync = B_TRUE; + } - /* - * In addition to the pool directory object, we might - * create the pool properties object, the features for - * read object, the features for write object, or the - * feature descriptions object. - */ - error = dsl_sync_task(spa->spa_name, NULL, - spa_sync_version, &ver, - 6, ZFS_SPACE_CHECK_RESERVED); - if (error) - return (error); + /* Save time if the version is already set. */ + if (ver == spa_version(spa)) continue; - } - need_sync = B_TRUE; - break; + /* + * In addition to the pool directory object, we might + * create the pool properties object, the features for + * read object, the features for write object, or the + * feature descriptions object. + */ + error = dsl_sync_task(spa->spa_name, NULL, + spa_sync_version, &ver, + 6, ZFS_SPACE_CHECK_RESERVED); + if (error) + return (error); + continue; } - if (need_sync) { - return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, - nvp, 6, ZFS_SPACE_CHECK_RESERVED)); - } + need_sync = B_TRUE; + break; +} - return (0); +if (need_sync) { + return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, + nvp, 6, ZFS_SPACE_CHECK_RESERVED)); +} + +return (0); } /* - * If the bootfs property value is dsobj, clear it. - */ +* If the bootfs property value is dsobj, clear it. +*/ void spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) { - if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { - VERIFY(zap_remove(spa->spa_meta_objset, - spa->spa_pool_props_object, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); - spa->spa_bootfs = 0; - } +if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { + VERIFY(zap_remove(spa->spa_meta_objset, + spa->spa_pool_props_object, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); + spa->spa_bootfs = 0; +} } /*ARGSUSED*/ static int spa_change_guid_check(void *arg, dmu_tx_t *tx) { - ASSERTV(uint64_t *newguid = arg); - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - vdev_t *rvd = spa->spa_root_vdev; - uint64_t vdev_state; +ASSERTV(uint64_t *newguid = arg); +spa_t *spa = dmu_tx_pool(tx)->dp_spa; +vdev_t *rvd = spa->spa_root_vdev; +uint64_t vdev_state; - if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { - int error = (spa_has_checkpoint(spa)) ? - ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; - return (SET_ERROR(error)); - } +if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + int error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (SET_ERROR(error)); +} - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - vdev_state = rvd->vdev_state; - spa_config_exit(spa, SCL_STATE, FTAG); +spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); +vdev_state = rvd->vdev_state; +spa_config_exit(spa, SCL_STATE, FTAG); - if (vdev_state != VDEV_STATE_HEALTHY) - return (SET_ERROR(ENXIO)); +if (vdev_state != VDEV_STATE_HEALTHY) + return (SET_ERROR(ENXIO)); - ASSERT3U(spa_guid(spa), !=, *newguid); +ASSERT3U(spa_guid(spa), !=, *newguid); - return (0); +return (0); } static void spa_change_guid_sync(void *arg, dmu_tx_t *tx) { - uint64_t *newguid = arg; - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - uint64_t oldguid; - vdev_t *rvd = spa->spa_root_vdev; +uint64_t *newguid = arg; +spa_t *spa = dmu_tx_pool(tx)->dp_spa; +uint64_t oldguid; +vdev_t *rvd = spa->spa_root_vdev; - oldguid = spa_guid(spa); +oldguid = spa_guid(spa); - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - rvd->vdev_guid = *newguid; - rvd->vdev_guid_sum += (*newguid - oldguid); - vdev_config_dirty(rvd); - spa_config_exit(spa, SCL_STATE, FTAG); +spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); +rvd->vdev_guid = *newguid; +rvd->vdev_guid_sum += (*newguid - oldguid); +vdev_config_dirty(rvd); +spa_config_exit(spa, SCL_STATE, FTAG); - spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", - oldguid, *newguid); +spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", + oldguid, *newguid); } /* - * Change the GUID for the pool. This is done so that we can later - * re-import a pool built from a clone of our own vdevs. We will modify - * the root vdev's guid, our own pool guid, and then mark all of our - * vdevs dirty. Note that we must make sure that all our vdevs are - * online when we do this, or else any vdevs that weren't present - * would be orphaned from our pool. We are also going to issue a - * sysevent to update any watchers. - */ +* Change the GUID for the pool. This is done so that we can later +* re-import a pool built from a clone of our own vdevs. We will modify +* the root vdev's guid, our own pool guid, and then mark all of our +* vdevs dirty. Note that we must make sure that all our vdevs are +* online when we do this, or else any vdevs that weren't present +* would be orphaned from our pool. We are also going to issue a +* sysevent to update any watchers. +*/ int spa_change_guid(spa_t *spa) { - int error; - uint64_t guid; +int error; +uint64_t guid; - mutex_enter(&spa->spa_vdev_top_lock); - mutex_enter(&spa_namespace_lock); - guid = spa_generate_guid(NULL); +mutex_enter(&spa->spa_vdev_top_lock); +mutex_enter(&spa_namespace_lock); +guid = spa_generate_guid(NULL); - error = dsl_sync_task(spa->spa_name, spa_change_guid_check, - spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); +error = dsl_sync_task(spa->spa_name, spa_change_guid_check, + spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); - if (error == 0) { - spa_write_cachefile(spa, B_FALSE, B_TRUE); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); - } +if (error == 0) { + spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); +} - mutex_exit(&spa_namespace_lock); - mutex_exit(&spa->spa_vdev_top_lock); +mutex_exit(&spa_namespace_lock); +mutex_exit(&spa->spa_vdev_top_lock); - return (error); +return (error); } /* - * ========================================================================== - * SPA state manipulation (open/create/destroy/import/export) - * ========================================================================== - */ +* ========================================================================== +* SPA state manipulation (open/create/destroy/import/export) +* ========================================================================== +*/ static int spa_error_entry_compare(const void *a, const void *b) { - const spa_error_entry_t *sa = (const spa_error_entry_t *)a; - const spa_error_entry_t *sb = (const spa_error_entry_t *)b; - int ret; +const spa_error_entry_t *sa = (const spa_error_entry_t *)a; +const spa_error_entry_t *sb = (const spa_error_entry_t *)b; +int ret; - ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, - sizeof (zbookmark_phys_t)); +ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, + sizeof (zbookmark_phys_t)); - return (AVL_ISIGN(ret)); +return (AVL_ISIGN(ret)); } /* - * Utility function which retrieves copies of the current logs and - * re-initializes them in the process. - */ +* Utility function which retrieves copies of the current logs and +* re-initializes them in the process. +*/ void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) { - ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); +ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); - bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); - bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); +bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); +bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); - avl_create(&spa->spa_errlist_scrub, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); - avl_create(&spa->spa_errlist_last, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); } static void spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { - const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; - enum zti_modes mode = ztip->zti_mode; - uint_t value = ztip->zti_value; - uint_t count = ztip->zti_count; - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - uint_t flags = 0; - boolean_t batch = B_FALSE; - - if (mode == ZTI_MODE_NULL) { - tqs->stqs_count = 0; - tqs->stqs_taskq = NULL; - return; - } +const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; +enum zti_modes mode = ztip->zti_mode; +uint_t value = ztip->zti_value; +uint_t count = ztip->zti_count; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +uint_t flags = 0; +boolean_t batch = B_FALSE; - ASSERT3U(count, >, 0); +if (mode == ZTI_MODE_NULL) { + tqs->stqs_count = 0; + tqs->stqs_taskq = NULL; + return; +} - tqs->stqs_count = count; - tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); +ASSERT3U(count, >, 0); - switch (mode) { - case ZTI_MODE_FIXED: - ASSERT3U(value, >=, 1); - value = MAX(value, 1); - flags |= TASKQ_DYNAMIC; - break; +tqs->stqs_count = count; +tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); - case ZTI_MODE_BATCH: - batch = B_TRUE; - flags |= TASKQ_THREADS_CPU_PCT; - value = MIN(zio_taskq_batch_pct, 100); - break; +switch (mode) { +case ZTI_MODE_FIXED: + ASSERT3U(value, >=, 1); + value = MAX(value, 1); + flags |= TASKQ_DYNAMIC; + break; - default: - panic("unrecognized mode for %s_%s taskq (%u:%u) in " - "spa_activate()", - zio_type_name[t], zio_taskq_types[q], mode, value); - break; - } +case ZTI_MODE_BATCH: + batch = B_TRUE; + flags |= TASKQ_THREADS_CPU_PCT; + value = MIN(zio_taskq_batch_pct, 100); + break; - for (uint_t i = 0; i < count; i++) { - taskq_t *tq; - char name[32]; +default: + panic("unrecognized mode for %s_%s taskq (%u:%u) in " + "spa_activate()", + zio_type_name[t], zio_taskq_types[q], mode, value); + break; +} - (void) snprintf(name, sizeof (name), "%s_%s", - zio_type_name[t], zio_taskq_types[q]); +for (uint_t i = 0; i < count; i++) { + taskq_t *tq; + char name[32]; - if (zio_taskq_sysdc && spa->spa_proc != &p0) { - if (batch) - flags |= TASKQ_DC_BATCH; + (void) snprintf(name, sizeof (name), "%s_%s", + zio_type_name[t], zio_taskq_types[q]); - tq = taskq_create_sysdc(name, value, 50, INT_MAX, - spa->spa_proc, zio_taskq_basedc, flags); - } else { - pri_t pri = maxclsyspri; - /* - * The write issue taskq can be extremely CPU - * intensive. Run it at slightly less important - * priority than the other taskqs. Under Linux this - * means incrementing the priority value on platforms - * like illumos it should be decremented. - */ - if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) - pri++; + if (zio_taskq_sysdc && spa->spa_proc != &p0) { + if (batch) + flags |= TASKQ_DC_BATCH; - tq = taskq_create_proc(name, value, pri, 50, - INT_MAX, spa->spa_proc, flags); - } + tq = taskq_create_sysdc(name, value, 50, INT_MAX, + spa->spa_proc, zio_taskq_basedc, flags); + } else { + pri_t pri = maxclsyspri; + /* + * The write issue taskq can be extremely CPU + * intensive. Run it at slightly less important + * priority than the other taskqs. Under Linux this + * means incrementing the priority value on platforms + * like illumos it should be decremented. + */ + if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) + pri++; - tqs->stqs_taskq[i] = tq; + tq = taskq_create_proc(name, value, pri, 50, + INT_MAX, spa->spa_proc, flags); } + + tqs->stqs_taskq[i] = tq; +} } static void spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - if (tqs->stqs_taskq == NULL) { - ASSERT3U(tqs->stqs_count, ==, 0); - return; - } +if (tqs->stqs_taskq == NULL) { + ASSERT3U(tqs->stqs_count, ==, 0); + return; +} - for (uint_t i = 0; i < tqs->stqs_count; i++) { - ASSERT3P(tqs->stqs_taskq[i], !=, NULL); - taskq_destroy(tqs->stqs_taskq[i]); - } +for (uint_t i = 0; i < tqs->stqs_count; i++) { + ASSERT3P(tqs->stqs_taskq[i], !=, NULL); + taskq_destroy(tqs->stqs_taskq[i]); +} - kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); - tqs->stqs_taskq = NULL; +kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); +tqs->stqs_taskq = NULL; } /* - * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. - * Note that a type may have multiple discrete taskqs to avoid lock contention - * on the taskq itself. In that case we choose which taskq at random by using - * the low bits of gethrtime(). - */ +* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. +* Note that a type may have multiple discrete taskqs to avoid lock contention +* on the taskq itself. In that case we choose which taskq at random by using +* the low bits of gethrtime(). +*/ void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) +task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - taskq_t *tq; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +taskq_t *tq; - ASSERT3P(tqs->stqs_taskq, !=, NULL); - ASSERT3U(tqs->stqs_count, !=, 0); +ASSERT3P(tqs->stqs_taskq, !=, NULL); +ASSERT3U(tqs->stqs_count, !=, 0); - if (tqs->stqs_count == 1) { - tq = tqs->stqs_taskq[0]; - } else { - tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; - } +if (tqs->stqs_count == 1) { + tq = tqs->stqs_taskq[0]; +} else { + tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; +} - taskq_dispatch_ent(tq, func, arg, flags, ent); +taskq_dispatch_ent(tq, func, arg, flags, ent); } /* - * Same as spa_taskq_dispatch_ent() but block on the task until completion. - */ +* Same as spa_taskq_dispatch_ent() but block on the task until completion. +*/ void spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags) +task_func_t *func, void *arg, uint_t flags) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - taskq_t *tq; - taskqid_t id; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +taskq_t *tq; +taskqid_t id; - ASSERT3P(tqs->stqs_taskq, !=, NULL); - ASSERT3U(tqs->stqs_count, !=, 0); +ASSERT3P(tqs->stqs_taskq, !=, NULL); +ASSERT3U(tqs->stqs_count, !=, 0); - if (tqs->stqs_count == 1) { - tq = tqs->stqs_taskq[0]; - } else { - tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; - } +if (tqs->stqs_count == 1) { + tq = tqs->stqs_taskq[0]; +} else { + tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; +} - id = taskq_dispatch(tq, func, arg, flags); - if (id) - taskq_wait_id(tq, id); +id = taskq_dispatch(tq, func, arg, flags); +if (id) + taskq_wait_id(tq, id); } static void spa_create_zio_taskqs(spa_t *spa) { - for (int t = 0; t < ZIO_TYPES; t++) { - for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - spa_taskqs_init(spa, t, q); - } +for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + spa_taskqs_init(spa, t, q); } } +} /* - * Disabled until spa_thread() can be adapted for Linux. - */ +* Disabled until spa_thread() can be adapted for Linux. +*/ #undef HAVE_SPA_THREAD #if defined(_KERNEL) && defined(HAVE_SPA_THREAD) static void spa_thread(void *arg) { - psetid_t zio_taskq_psrset_bind = PS_NONE; - callb_cpr_t cprinfo; - - spa_t *spa = arg; - user_t *pu = PTOU(curproc); +psetid_t zio_taskq_psrset_bind = PS_NONE; +callb_cpr_t cprinfo; - CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, - spa->spa_name); +spa_t *spa = arg; +user_t *pu = PTOU(curproc); - ASSERT(curproc != &p0); - (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), - "zpool-%s", spa->spa_name); - (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); +CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, + spa->spa_name); - /* bind this thread to the requested psrset */ - if (zio_taskq_psrset_bind != PS_NONE) { - pool_lock(); - mutex_enter(&cpu_lock); - mutex_enter(&pidlock); - mutex_enter(&curproc->p_lock); +ASSERT(curproc != &p0); +(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), + "zpool-%s", spa->spa_name); +(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); - if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, - 0, NULL, NULL) == 0) { - curthread->t_bind_pset = zio_taskq_psrset_bind; - } else { - cmn_err(CE_WARN, - "Couldn't bind process for zfs pool \"%s\" to " - "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); - } +/* bind this thread to the requested psrset */ +if (zio_taskq_psrset_bind != PS_NONE) { + pool_lock(); + mutex_enter(&cpu_lock); + mutex_enter(&pidlock); + mutex_enter(&curproc->p_lock); - mutex_exit(&curproc->p_lock); - mutex_exit(&pidlock); - mutex_exit(&cpu_lock); - pool_unlock(); + if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, + 0, NULL, NULL) == 0) { + curthread->t_bind_pset = zio_taskq_psrset_bind; + } else { + cmn_err(CE_WARN, + "Couldn't bind process for zfs pool \"%s\" to " + "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); } - if (zio_taskq_sysdc) { - sysdc_thread_enter(curthread, 100, 0); - } + mutex_exit(&curproc->p_lock); + mutex_exit(&pidlock); + mutex_exit(&cpu_lock); + pool_unlock(); +} - spa->spa_proc = curproc; - spa->spa_did = curthread->t_did; +if (zio_taskq_sysdc) { + sysdc_thread_enter(curthread, 100, 0); +} - spa_create_zio_taskqs(spa); +spa->spa_proc = curproc; +spa->spa_did = curthread->t_did; - mutex_enter(&spa->spa_proc_lock); - ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); +spa_create_zio_taskqs(spa); - spa->spa_proc_state = SPA_PROC_ACTIVE; - cv_broadcast(&spa->spa_proc_cv); +mutex_enter(&spa->spa_proc_lock); +ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); - CALLB_CPR_SAFE_BEGIN(&cprinfo); - while (spa->spa_proc_state == SPA_PROC_ACTIVE) - cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); - CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); +spa->spa_proc_state = SPA_PROC_ACTIVE; +cv_broadcast(&spa->spa_proc_cv); - ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); - spa->spa_proc_state = SPA_PROC_GONE; - spa->spa_proc = &p0; - cv_broadcast(&spa->spa_proc_cv); - CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ +CALLB_CPR_SAFE_BEGIN(&cprinfo); +while (spa->spa_proc_state == SPA_PROC_ACTIVE) + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); +CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); - mutex_enter(&curproc->p_lock); - lwp_exit(); +ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); +spa->spa_proc_state = SPA_PROC_GONE; +spa->spa_proc = &p0; +cv_broadcast(&spa->spa_proc_cv); +CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ + +mutex_enter(&curproc->p_lock); +lwp_exit(); } #endif /* - * Activate an uninitialized pool. - */ +* Activate an uninitialized pool. +*/ static void spa_activate(spa_t *spa, int mode) { - ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); +ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); - spa->spa_state = POOL_STATE_ACTIVE; - spa->spa_mode = mode; +spa->spa_state = POOL_STATE_ACTIVE; +spa->spa_mode = mode; - spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops); - /* Try to create a covering process */ - mutex_enter(&spa->spa_proc_lock); - ASSERT(spa->spa_proc_state == SPA_PROC_NONE); - ASSERT(spa->spa_proc == &p0); - spa->spa_did = 0; +/* Try to create a covering process */ +mutex_enter(&spa->spa_proc_lock); +ASSERT(spa->spa_proc_state == SPA_PROC_NONE); +ASSERT(spa->spa_proc == &p0); +spa->spa_did = 0; #ifdef HAVE_SPA_THREAD - /* Only create a process if we're going to be around a while. */ - if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { - if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, - NULL, 0) == 0) { - spa->spa_proc_state = SPA_PROC_CREATED; - while (spa->spa_proc_state == SPA_PROC_CREATED) { - cv_wait(&spa->spa_proc_cv, - &spa->spa_proc_lock); - } - ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); - ASSERT(spa->spa_proc != &p0); - ASSERT(spa->spa_did != 0); - } else { +/* Only create a process if we're going to be around a while. */ +if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { + if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, + NULL, 0) == 0) { + spa->spa_proc_state = SPA_PROC_CREATED; + while (spa->spa_proc_state == SPA_PROC_CREATED) { + cv_wait(&spa->spa_proc_cv, + &spa->spa_proc_lock); + } + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + ASSERT(spa->spa_proc != &p0); + ASSERT(spa->spa_did != 0); + } else { #ifdef _KERNEL - cmn_err(CE_WARN, - "Couldn't create process for zfs pool \"%s\"\n", - spa->spa_name); + cmn_err(CE_WARN, + "Couldn't create process for zfs pool \"%s\"\n", + spa->spa_name); #endif - } } +} #endif /* HAVE_SPA_THREAD */ - mutex_exit(&spa->spa_proc_lock); +mutex_exit(&spa->spa_proc_lock); - /* If we didn't create a process, we need to create our taskqs. */ - if (spa->spa_proc == &p0) { - spa_create_zio_taskqs(spa); - } +/* If we didn't create a process, we need to create our taskqs. */ +if (spa->spa_proc == &p0) { + spa_create_zio_taskqs(spa); +} - for (size_t i = 0; i < TXG_SIZE; i++) { - spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL); - } +for (size_t i = 0; i < TXG_SIZE; i++) { + spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL); +} - list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), - offsetof(vdev_t, vdev_config_dirty_node)); - list_create(&spa->spa_evicting_os_list, sizeof (objset_t), - offsetof(objset_t, os_evicting_node)); - list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), - offsetof(vdev_t, vdev_state_dirty_node)); +list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_config_dirty_node)); +list_create(&spa->spa_evicting_os_list, sizeof (objset_t), + offsetof(objset_t, os_evicting_node)); +list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_state_dirty_node)); - txg_list_create(&spa->spa_vdev_txg_list, spa, - offsetof(struct vdev, vdev_txg_node)); +txg_list_create(&spa->spa_vdev_txg_list, spa, + offsetof(struct vdev, vdev_txg_node)); - avl_create(&spa->spa_errlist_scrub, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); - avl_create(&spa->spa_errlist_last, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); - spa_keystore_init(&spa->spa_keystore); +spa_keystore_init(&spa->spa_keystore); - /* - * This taskq is used to perform zvol-minor-related tasks - * asynchronously. This has several advantages, including easy - * resolution of various deadlocks (zfsonlinux bug #3681). - * - * The taskq must be single threaded to ensure tasks are always - * processed in the order in which they were dispatched. - * - * A taskq per pool allows one to keep the pools independent. - * This way if one pool is suspended, it will not impact another. - * - * The preferred location to dispatch a zvol minor task is a sync - * task. In this context, there is easy access to the spa_t and minimal - * error handling is required because the sync task must succeed. - */ - spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, - 1, INT_MAX, 0); +/* + * This taskq is used to perform zvol-minor-related tasks + * asynchronously. This has several advantages, including easy + * resolution of various deadlocks (zfsonlinux bug #3681). + * + * The taskq must be single threaded to ensure tasks are always + * processed in the order in which they were dispatched. + * + * A taskq per pool allows one to keep the pools independent. + * This way if one pool is suspended, it will not impact another. + * + * The preferred location to dispatch a zvol minor task is a sync + * task. In this context, there is easy access to the spa_t and minimal + * error handling is required because the sync task must succeed. + */ +spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, + 1, INT_MAX, 0); - /* - * Taskq dedicated to prefetcher threads: this is used to prevent the - * pool traverse code from monopolizing the global (and limited) - * system_taskq by inappropriately scheduling long running tasks on it. - */ - spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, - defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); +/* + * Taskq dedicated to prefetcher threads: this is used to prevent the + * pool traverse code from monopolizing the global (and limited) + * system_taskq by inappropriately scheduling long running tasks on it. + */ +spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); - /* - * The taskq to upgrade datasets in this pool. Currently used by - * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA. - */ - spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus, - defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); +/* + * The taskq to upgrade datasets in this pool. Currently used by + * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA. + */ +spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); } /* - * Opposite of spa_activate(). - */ +* Opposite of spa_activate(). +*/ static void spa_deactivate(spa_t *spa) { - ASSERT(spa->spa_sync_on == B_FALSE); - ASSERT(spa->spa_dsl_pool == NULL); - ASSERT(spa->spa_root_vdev == NULL); - ASSERT(spa->spa_async_zio_root == NULL); - ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); - - spa_evicting_os_wait(spa); +ASSERT(spa->spa_sync_on == B_FALSE); +ASSERT(spa->spa_dsl_pool == NULL); +ASSERT(spa->spa_root_vdev == NULL); +ASSERT(spa->spa_async_zio_root == NULL); +ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); - if (spa->spa_zvol_taskq) { - taskq_destroy(spa->spa_zvol_taskq); - spa->spa_zvol_taskq = NULL; - } +spa_evicting_os_wait(spa); - if (spa->spa_prefetch_taskq) { - taskq_destroy(spa->spa_prefetch_taskq); - spa->spa_prefetch_taskq = NULL; - } +if (spa->spa_zvol_taskq) { + taskq_destroy(spa->spa_zvol_taskq); + spa->spa_zvol_taskq = NULL; +} - if (spa->spa_upgrade_taskq) { - taskq_destroy(spa->spa_upgrade_taskq); - spa->spa_upgrade_taskq = NULL; - } +if (spa->spa_prefetch_taskq) { + taskq_destroy(spa->spa_prefetch_taskq); + spa->spa_prefetch_taskq = NULL; +} - txg_list_destroy(&spa->spa_vdev_txg_list); +if (spa->spa_upgrade_taskq) { + taskq_destroy(spa->spa_upgrade_taskq); + spa->spa_upgrade_taskq = NULL; +} - list_destroy(&spa->spa_config_dirty_list); - list_destroy(&spa->spa_evicting_os_list); - list_destroy(&spa->spa_state_dirty_list); +txg_list_destroy(&spa->spa_vdev_txg_list); - taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); +list_destroy(&spa->spa_config_dirty_list); +list_destroy(&spa->spa_evicting_os_list); +list_destroy(&spa->spa_state_dirty_list); - for (int t = 0; t < ZIO_TYPES; t++) { - for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - spa_taskqs_fini(spa, t, q); - } - } +taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); - for (size_t i = 0; i < TXG_SIZE; i++) { - ASSERT3P(spa->spa_txg_zio[i], !=, NULL); - VERIFY0(zio_wait(spa->spa_txg_zio[i])); - spa->spa_txg_zio[i] = NULL; +for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + spa_taskqs_fini(spa, t, q); } +} - metaslab_class_destroy(spa->spa_normal_class); - spa->spa_normal_class = NULL; +for (size_t i = 0; i < TXG_SIZE; i++) { + ASSERT3P(spa->spa_txg_zio[i], !=, NULL); + VERIFY0(zio_wait(spa->spa_txg_zio[i])); + spa->spa_txg_zio[i] = NULL; +} - metaslab_class_destroy(spa->spa_log_class); - spa->spa_log_class = NULL; +metaslab_class_destroy(spa->spa_normal_class); +spa->spa_normal_class = NULL; - metaslab_class_destroy(spa->spa_special_class); - spa->spa_special_class = NULL; +metaslab_class_destroy(spa->spa_log_class); +spa->spa_log_class = NULL; - metaslab_class_destroy(spa->spa_dedup_class); - spa->spa_dedup_class = NULL; +metaslab_class_destroy(spa->spa_special_class); +spa->spa_special_class = NULL; - /* - * If this was part of an import or the open otherwise failed, we may - * still have errors left in the queues. Empty them just in case. - */ - spa_errlog_drain(spa); - avl_destroy(&spa->spa_errlist_scrub); - avl_destroy(&spa->spa_errlist_last); +metaslab_class_destroy(spa->spa_dedup_class); +spa->spa_dedup_class = NULL; - spa_keystore_fini(&spa->spa_keystore); +/* + * If this was part of an import or the open otherwise failed, we may + * still have errors left in the queues. Empty them just in case. + */ +spa_errlog_drain(spa); +avl_destroy(&spa->spa_errlist_scrub); +avl_destroy(&spa->spa_errlist_last); - spa->spa_state = POOL_STATE_UNINITIALIZED; +spa_keystore_fini(&spa->spa_keystore); - mutex_enter(&spa->spa_proc_lock); - if (spa->spa_proc_state != SPA_PROC_NONE) { - ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); - spa->spa_proc_state = SPA_PROC_DEACTIVATE; - cv_broadcast(&spa->spa_proc_cv); - while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { - ASSERT(spa->spa_proc != &p0); - cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); - } - ASSERT(spa->spa_proc_state == SPA_PROC_GONE); - spa->spa_proc_state = SPA_PROC_NONE; - } - ASSERT(spa->spa_proc == &p0); - mutex_exit(&spa->spa_proc_lock); +spa->spa_state = POOL_STATE_UNINITIALIZED; - /* - * We want to make sure spa_thread() has actually exited the ZFS - * module, so that the module can't be unloaded out from underneath - * it. - */ - if (spa->spa_did != 0) { - thread_join(spa->spa_did); - spa->spa_did = 0; +mutex_enter(&spa->spa_proc_lock); +if (spa->spa_proc_state != SPA_PROC_NONE) { + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + spa->spa_proc_state = SPA_PROC_DEACTIVATE; + cv_broadcast(&spa->spa_proc_cv); + while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { + ASSERT(spa->spa_proc != &p0); + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } + ASSERT(spa->spa_proc_state == SPA_PROC_GONE); + spa->spa_proc_state = SPA_PROC_NONE; } +ASSERT(spa->spa_proc == &p0); +mutex_exit(&spa->spa_proc_lock); /* - * Verify a pool configuration, and construct the vdev tree appropriately. This - * will create all the necessary vdevs in the appropriate layout, with each vdev - * in the CLOSED state. This will prep the pool before open/creation/import. - * All vdev validation is done by the vdev_alloc() routine. + * We want to make sure spa_thread() has actually exited the ZFS + * module, so that the module can't be unloaded out from underneath + * it. */ +if (spa->spa_did != 0) { + thread_join(spa->spa_did); + spa->spa_did = 0; +} +} + +/* +* Verify a pool configuration, and construct the vdev tree appropriately. This +* will create all the necessary vdevs in the appropriate layout, with each vdev +* in the CLOSED state. This will prep the pool before open/creation/import. +* All vdev validation is done by the vdev_alloc() routine. +*/ static int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, - uint_t id, int atype) +uint_t id, int atype) { - nvlist_t **child; - uint_t children; - int error; +nvlist_t **child; +uint_t children; +int error; - if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) - return (error); +if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) + return (error); - if ((*vdp)->vdev_ops->vdev_op_leaf) - return (0); +if ((*vdp)->vdev_ops->vdev_op_leaf) + return (0); - error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children); +error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); - if (error == ENOENT) - return (0); +if (error == ENOENT) + return (0); - if (error) { +if (error) { + vdev_free(*vdp); + *vdp = NULL; + return (SET_ERROR(EINVAL)); +} + +for (int c = 0; c < children; c++) { + vdev_t *vd; + if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, + atype)) != 0) { vdev_free(*vdp); *vdp = NULL; - return (SET_ERROR(EINVAL)); - } - - for (int c = 0; c < children; c++) { - vdev_t *vd; - if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, - atype)) != 0) { - vdev_free(*vdp); - *vdp = NULL; - return (error); - } + return (error); } +} - ASSERT(*vdp != NULL); +ASSERT(*vdp != NULL); - return (0); +return (0); } /* - * Opposite of spa_load(). - */ +* Opposite of spa_load(). +*/ static void spa_unload(spa_t *spa) { - int i; +int i; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa_import_progress_remove(spa_guid(spa)); - spa_load_note(spa, "UNLOADING"); +spa_import_progress_remove(spa_guid(spa)); +spa_load_note(spa, "UNLOADING"); - /* - * Stop async tasks. - */ - spa_async_suspend(spa); +/* + * Stop async tasks. + */ +spa_async_suspend(spa); - if (spa->spa_root_vdev) { - vdev_t *root_vdev = spa->spa_root_vdev; - vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); - vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); - vdev_autotrim_stop_all(spa); - } +if (spa->spa_root_vdev) { + vdev_t *root_vdev = spa->spa_root_vdev; + vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); + vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); + vdev_autotrim_stop_all(spa); +} - /* - * Stop syncing. - */ - if (spa->spa_sync_on) { - txg_sync_stop(spa->spa_dsl_pool); - spa->spa_sync_on = B_FALSE; - } +/* + * Stop syncing. + */ +if (spa->spa_sync_on) { + txg_sync_stop(spa->spa_dsl_pool); + spa->spa_sync_on = B_FALSE; +} - /* - * Even though vdev_free() also calls vdev_metaslab_fini, we need - * to call it earlier, before we wait for async i/o to complete. - * This ensures that there is no async metaslab prefetching, by - * calling taskq_wait(mg_taskq). - */ - if (spa->spa_root_vdev != NULL) { - spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); - for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) - vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); - spa_config_exit(spa, SCL_ALL, spa); - } +/* + * Even though vdev_free() also calls vdev_metaslab_fini, we need + * to call it earlier, before we wait for async i/o to complete. + * This ensures that there is no async metaslab prefetching, by + * calling taskq_wait(mg_taskq). + */ +if (spa->spa_root_vdev != NULL) { + spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); + for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) + vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); + spa_config_exit(spa, SCL_ALL, spa); +} - if (spa->spa_mmp.mmp_thread) - mmp_thread_stop(spa); +if (spa->spa_mmp.mmp_thread) + mmp_thread_stop(spa); - /* - * Wait for any outstanding async I/O to complete. - */ - if (spa->spa_async_zio_root != NULL) { - for (int i = 0; i < max_ncpus; i++) - (void) zio_wait(spa->spa_async_zio_root[i]); - kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); - spa->spa_async_zio_root = NULL; - } +/* + * Wait for any outstanding async I/O to complete. + */ +if (spa->spa_async_zio_root != NULL) { + for (int i = 0; i < max_ncpus; i++) + (void) zio_wait(spa->spa_async_zio_root[i]); + kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); + spa->spa_async_zio_root = NULL; +} - if (spa->spa_vdev_removal != NULL) { - spa_vdev_removal_destroy(spa->spa_vdev_removal); - spa->spa_vdev_removal = NULL; - } +/* XXX move to spa_destroy_aux_threads() once it's upstream */ +if (spa->spa_raidz_expand_zthr != NULL) { + zthr_destroy(spa->spa_raidz_expand_zthr); + spa->spa_raidz_expand_zthr = NULL; +} - if (spa->spa_condense_zthr != NULL) { - zthr_destroy(spa->spa_condense_zthr); - spa->spa_condense_zthr = NULL; - } +if (spa->spa_vdev_removal != NULL) { + spa_vdev_removal_destroy(spa->spa_vdev_removal); + spa->spa_vdev_removal = NULL; +} - if (spa->spa_checkpoint_discard_zthr != NULL) { - zthr_destroy(spa->spa_checkpoint_discard_zthr); - spa->spa_checkpoint_discard_zthr = NULL; - } +if (spa->spa_condense_zthr != NULL) { + zthr_destroy(spa->spa_condense_zthr); + spa->spa_condense_zthr = NULL; +} - spa_condense_fini(spa); +if (spa->spa_checkpoint_discard_zthr != NULL) { + zthr_destroy(spa->spa_checkpoint_discard_zthr); + spa->spa_checkpoint_discard_zthr = NULL; +} - bpobj_close(&spa->spa_deferred_bpobj); +spa_condense_fini(spa); - spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); +bpobj_close(&spa->spa_deferred_bpobj); - /* - * Close all vdevs. - */ - if (spa->spa_root_vdev) - vdev_free(spa->spa_root_vdev); - ASSERT(spa->spa_root_vdev == NULL); +spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); - /* - * Close the dsl pool. - */ - if (spa->spa_dsl_pool) { - dsl_pool_close(spa->spa_dsl_pool); - spa->spa_dsl_pool = NULL; - spa->spa_meta_objset = NULL; - } +/* + * Close all vdevs. + */ +if (spa->spa_root_vdev) + vdev_free(spa->spa_root_vdev); +ASSERT(spa->spa_root_vdev == NULL); - ddt_unload(spa); +/* + * Close the dsl pool. + */ +if (spa->spa_dsl_pool) { + dsl_pool_close(spa->spa_dsl_pool); + spa->spa_dsl_pool = NULL; + spa->spa_meta_objset = NULL; +} - /* - * Drop and purge level 2 cache - */ - spa_l2cache_drop(spa); +ddt_unload(spa); - for (i = 0; i < spa->spa_spares.sav_count; i++) - vdev_free(spa->spa_spares.sav_vdevs[i]); - if (spa->spa_spares.sav_vdevs) { - kmem_free(spa->spa_spares.sav_vdevs, - spa->spa_spares.sav_count * sizeof (void *)); - spa->spa_spares.sav_vdevs = NULL; - } - if (spa->spa_spares.sav_config) { - nvlist_free(spa->spa_spares.sav_config); - spa->spa_spares.sav_config = NULL; - } - spa->spa_spares.sav_count = 0; +/* + * Drop and purge level 2 cache + */ +spa_l2cache_drop(spa); - for (i = 0; i < spa->spa_l2cache.sav_count; i++) { - vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); - vdev_free(spa->spa_l2cache.sav_vdevs[i]); - } - if (spa->spa_l2cache.sav_vdevs) { - kmem_free(spa->spa_l2cache.sav_vdevs, - spa->spa_l2cache.sav_count * sizeof (void *)); - spa->spa_l2cache.sav_vdevs = NULL; - } - if (spa->spa_l2cache.sav_config) { - nvlist_free(spa->spa_l2cache.sav_config); - spa->spa_l2cache.sav_config = NULL; - } - spa->spa_l2cache.sav_count = 0; +for (i = 0; i < spa->spa_spares.sav_count; i++) + vdev_free(spa->spa_spares.sav_vdevs[i]); +if (spa->spa_spares.sav_vdevs) { + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); + spa->spa_spares.sav_vdevs = NULL; +} +if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; +} +spa->spa_spares.sav_count = 0; - spa->spa_async_suspended = 0; +for (i = 0; i < spa->spa_l2cache.sav_count; i++) { + vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); + vdev_free(spa->spa_l2cache.sav_vdevs[i]); +} +if (spa->spa_l2cache.sav_vdevs) { + kmem_free(spa->spa_l2cache.sav_vdevs, + spa->spa_l2cache.sav_count * sizeof (void *)); + spa->spa_l2cache.sav_vdevs = NULL; +} +if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; +} +spa->spa_l2cache.sav_count = 0; - spa->spa_indirect_vdevs_loaded = B_FALSE; +spa->spa_async_suspended = 0; - if (spa->spa_comment != NULL) { - spa_strfree(spa->spa_comment); - spa->spa_comment = NULL; - } +spa->spa_indirect_vdevs_loaded = B_FALSE; - spa_config_exit(spa, SCL_ALL, spa); +if (spa->spa_comment != NULL) { + spa_strfree(spa->spa_comment); + spa->spa_comment = NULL; } -/* - * Load (or re-load) the current list of vdevs describing the active spares for - * this pool. When this is called, we have some form of basic information in - * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and - * then re-generate a more complete list including status information. - */ +spa->spa_raidz_expand = NULL; + +spa_config_exit(spa, SCL_ALL, spa); +} + +/* +* Load (or re-load) the current list of vdevs describing the active spares for +* this pool. When this is called, we have some form of basic information in +* 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and +* then re-generate a more complete list including status information. +*/ void spa_load_spares(spa_t *spa) { - nvlist_t **spares; - uint_t nspares; - int i; - vdev_t *vd, *tvd; +nvlist_t **spares; +uint_t nspares; +int i; +vdev_t *vd, *tvd; #ifndef _KERNEL - /* - * zdb opens both the current state of the pool and the - * checkpointed state (if present), with a different spa_t. - * - * As spare vdevs are shared among open pools, we skip loading - * them when we load the checkpointed state of the pool. - */ - if (!spa_writeable(spa)) - return; +/* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As spare vdevs are shared among open pools, we skip loading + * them when we load the checkpointed state of the pool. + */ +if (!spa_writeable(spa)) + return; #endif - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - - /* - * First, close and free any existing spare vdevs. - */ - for (i = 0; i < spa->spa_spares.sav_count; i++) { - vd = spa->spa_spares.sav_vdevs[i]; - - /* Undo the call to spa_activate() below */ - if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, - B_FALSE)) != NULL && tvd->vdev_isspare) - spa_spare_remove(tvd); - vdev_close(vd); - vdev_free(vd); - } - - if (spa->spa_spares.sav_vdevs) - kmem_free(spa->spa_spares.sav_vdevs, - spa->spa_spares.sav_count * sizeof (void *)); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - if (spa->spa_spares.sav_config == NULL) - nspares = 0; - else - VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); +/* + * First, close and free any existing spare vdevs. + */ +for (i = 0; i < spa->spa_spares.sav_count; i++) { + vd = spa->spa_spares.sav_vdevs[i]; - spa->spa_spares.sav_count = (int)nspares; - spa->spa_spares.sav_vdevs = NULL; + /* Undo the call to spa_activate() below */ + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL && tvd->vdev_isspare) + spa_spare_remove(tvd); + vdev_close(vd); + vdev_free(vd); +} - if (nspares == 0) - return; +if (spa->spa_spares.sav_vdevs) + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); - /* - * Construct the array of vdevs, opening them to get status in the - * process. For each spare, there is potentially two different vdev_t - * structures associated with it: one in the list of spares (used only - * for basic validation purposes) and one in the active vdev - * configuration (if it's spared in). During this phase we open and - * validate each vdev on the spare list. If the vdev also exists in the - * active configuration, then we also mark this vdev as an active spare. - */ - spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), - KM_SLEEP); - for (i = 0; i < spa->spa_spares.sav_count; i++) { - VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, - VDEV_ALLOC_SPARE) == 0); - ASSERT(vd != NULL); +if (spa->spa_spares.sav_config == NULL) + nspares = 0; +else + VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - spa->spa_spares.sav_vdevs[i] = vd; +spa->spa_spares.sav_count = (int)nspares; +spa->spa_spares.sav_vdevs = NULL; - if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, - B_FALSE)) != NULL) { - if (!tvd->vdev_isspare) - spa_spare_add(tvd); +if (nspares == 0) + return; - /* - * We only mark the spare active if we were successfully - * able to load the vdev. Otherwise, importing a pool - * with a bad active spare would result in strange - * behavior, because multiple pool would think the spare - * is actively in use. - * - * There is a vulnerability here to an equally bizarre - * circumstance, where a dead active spare is later - * brought back to life (onlined or otherwise). Given - * the rarity of this scenario, and the extra complexity - * it adds, we ignore the possibility. - */ - if (!vdev_is_dead(tvd)) - spa_spare_activate(tvd); - } +/* + * Construct the array of vdevs, opening them to get status in the + * process. For each spare, there is potentially two different vdev_t + * structures associated with it: one in the list of spares (used only + * for basic validation purposes) and one in the active vdev + * configuration (if it's spared in). During this phase we open and + * validate each vdev on the spare list. If the vdev also exists in the + * active configuration, then we also mark this vdev as an active spare. + */ +spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), + KM_SLEEP); +for (i = 0; i < spa->spa_spares.sav_count; i++) { + VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, + VDEV_ALLOC_SPARE) == 0); + ASSERT(vd != NULL); - vd->vdev_top = vd; - vd->vdev_aux = &spa->spa_spares; + spa->spa_spares.sav_vdevs[i] = vd; - if (vdev_open(vd) != 0) - continue; + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL) { + if (!tvd->vdev_isspare) + spa_spare_add(tvd); - if (vdev_validate_aux(vd) == 0) - spa_spare_add(vd); + /* + * We only mark the spare active if we were successfully + * able to load the vdev. Otherwise, importing a pool + * with a bad active spare would result in strange + * behavior, because multiple pool would think the spare + * is actively in use. + * + * There is a vulnerability here to an equally bizarre + * circumstance, where a dead active spare is later + * brought back to life (onlined or otherwise). Given + * the rarity of this scenario, and the extra complexity + * it adds, we ignore the possibility. + */ + if (!vdev_is_dead(tvd)) + spa_spare_activate(tvd); } - /* - * Recompute the stashed list of spares, with status information - * this time. - */ - VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, - DATA_TYPE_NVLIST_ARRAY) == 0); + vd->vdev_top = vd; + vd->vdev_aux = &spa->spa_spares; - spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), - KM_SLEEP); - for (i = 0; i < spa->spa_spares.sav_count; i++) - spares[i] = vdev_config_generate(spa, - spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); - for (i = 0; i < spa->spa_spares.sav_count; i++) - nvlist_free(spares[i]); - kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); + if (vdev_open(vd) != 0) + continue; + + if (vdev_validate_aux(vd) == 0) + spa_spare_add(vd); } /* - * Load (or re-load) the current list of vdevs describing the active l2cache for - * this pool. When this is called, we have some form of basic information in - * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and - * then re-generate a more complete list including status information. - * Devices which are already active have their details maintained, and are - * not re-opened. + * Recompute the stashed list of spares, with status information + * this time. */ +VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, + DATA_TYPE_NVLIST_ARRAY) == 0); + +spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), + KM_SLEEP); +for (i = 0; i < spa->spa_spares.sav_count; i++) + spares[i] = vdev_config_generate(spa, + spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); +VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); +for (i = 0; i < spa->spa_spares.sav_count; i++) + nvlist_free(spares[i]); +kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); +} + +/* +* Load (or re-load) the current list of vdevs describing the active l2cache for +* this pool. When this is called, we have some form of basic information in +* 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and +* then re-generate a more complete list including status information. +* Devices which are already active have their details maintained, and are +* not re-opened. +*/ void spa_load_l2cache(spa_t *spa) { - nvlist_t **l2cache = NULL; - uint_t nl2cache; - int i, j, oldnvdevs; - uint64_t guid; - vdev_t *vd, **oldvdevs, **newvdevs; - spa_aux_vdev_t *sav = &spa->spa_l2cache; +nvlist_t **l2cache = NULL; +uint_t nl2cache; +int i, j, oldnvdevs; +uint64_t guid; +vdev_t *vd, **oldvdevs, **newvdevs; +spa_aux_vdev_t *sav = &spa->spa_l2cache; #ifndef _KERNEL - /* - * zdb opens both the current state of the pool and the - * checkpointed state (if present), with a different spa_t. - * - * As L2 caches are part of the ARC which is shared among open - * pools, we skip loading them when we load the checkpointed - * state of the pool. - */ - if (!spa_writeable(spa)) - return; +/* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As L2 caches are part of the ARC which is shared among open + * pools, we skip loading them when we load the checkpointed + * state of the pool. + */ +if (!spa_writeable(spa)) + return; #endif - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - - oldvdevs = sav->sav_vdevs; - oldnvdevs = sav->sav_count; - sav->sav_vdevs = NULL; - sav->sav_count = 0; - - if (sav->sav_config == NULL) { - nl2cache = 0; - newvdevs = NULL; - goto out; - } +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, - ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); +oldvdevs = sav->sav_vdevs; +oldnvdevs = sav->sav_count; +sav->sav_vdevs = NULL; +sav->sav_count = 0; - /* - * Process new nvlist of vdevs. - */ - for (i = 0; i < nl2cache; i++) { - VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, - &guid) == 0); +if (sav->sav_config == NULL) { + nl2cache = 0; + newvdevs = NULL; + goto out; +} - newvdevs[i] = NULL; - for (j = 0; j < oldnvdevs; j++) { - vd = oldvdevs[j]; - if (vd != NULL && guid == vd->vdev_guid) { - /* - * Retain previous vdev for add/remove ops. - */ - newvdevs[i] = vd; - oldvdevs[j] = NULL; - break; - } - } +VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); +newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); - if (newvdevs[i] == NULL) { +/* + * Process new nvlist of vdevs. + */ +for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, + &guid) == 0); + + newvdevs[i] = NULL; + for (j = 0; j < oldnvdevs; j++) { + vd = oldvdevs[j]; + if (vd != NULL && guid == vd->vdev_guid) { /* - * Create new vdev + * Retain previous vdev for add/remove ops. */ - VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, - VDEV_ALLOC_L2CACHE) == 0); - ASSERT(vd != NULL); newvdevs[i] = vd; + oldvdevs[j] = NULL; + break; + } + } - /* - * Commit this vdev as an l2cache device, - * even if it fails to open. - */ - spa_l2cache_add(vd); + if (newvdevs[i] == NULL) { + /* + * Create new vdev + */ + VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, + VDEV_ALLOC_L2CACHE) == 0); + ASSERT(vd != NULL); + newvdevs[i] = vd; - vd->vdev_top = vd; - vd->vdev_aux = sav; + /* + * Commit this vdev as an l2cache device, + * even if it fails to open. + */ + spa_l2cache_add(vd); - spa_l2cache_activate(vd); + vd->vdev_top = vd; + vd->vdev_aux = sav; - if (vdev_open(vd) != 0) - continue; + spa_l2cache_activate(vd); - (void) vdev_validate_aux(vd); + if (vdev_open(vd) != 0) + continue; - if (!vdev_is_dead(vd)) - l2arc_add_vdev(spa, vd); - } + (void) vdev_validate_aux(vd); + + if (!vdev_is_dead(vd)) + l2arc_add_vdev(spa, vd); } +} - sav->sav_vdevs = newvdevs; - sav->sav_count = (int)nl2cache; +sav->sav_vdevs = newvdevs; +sav->sav_count = (int)nl2cache; - /* - * Recompute the stashed list of l2cache devices, with status - * information this time. - */ - VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, - DATA_TYPE_NVLIST_ARRAY) == 0); +/* + * Recompute the stashed list of l2cache devices, with status + * information this time. + */ +VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, + DATA_TYPE_NVLIST_ARRAY) == 0); - if (sav->sav_count > 0) - l2cache = kmem_alloc(sav->sav_count * sizeof (void *), - KM_SLEEP); - for (i = 0; i < sav->sav_count; i++) - l2cache[i] = vdev_config_generate(spa, - sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); +if (sav->sav_count > 0) + l2cache = kmem_alloc(sav->sav_count * sizeof (void *), + KM_SLEEP); +for (i = 0; i < sav->sav_count; i++) + l2cache[i] = vdev_config_generate(spa, + sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); +VERIFY(nvlist_add_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); out: - /* - * Purge vdevs that were dropped - */ - for (i = 0; i < oldnvdevs; i++) { - uint64_t pool; - - vd = oldvdevs[i]; - if (vd != NULL) { - ASSERT(vd->vdev_isl2cache); - - if (spa_l2cache_exists(vd->vdev_guid, &pool) && - pool != 0ULL && l2arc_vdev_present(vd)) - l2arc_remove_vdev(vd); - vdev_clear_stats(vd); - vdev_free(vd); - } +/* + * Purge vdevs that were dropped + */ +for (i = 0; i < oldnvdevs; i++) { + uint64_t pool; + + vd = oldvdevs[i]; + if (vd != NULL) { + ASSERT(vd->vdev_isl2cache); + + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); + vdev_clear_stats(vd); + vdev_free(vd); } +} - if (oldvdevs) - kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); +if (oldvdevs) + kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); - for (i = 0; i < sav->sav_count; i++) - nvlist_free(l2cache[i]); - if (sav->sav_count) - kmem_free(l2cache, sav->sav_count * sizeof (void *)); +for (i = 0; i < sav->sav_count; i++) + nvlist_free(l2cache[i]); +if (sav->sav_count) + kmem_free(l2cache, sav->sav_count * sizeof (void *)); } static int load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { - dmu_buf_t *db; - char *packed = NULL; - size_t nvsize = 0; - int error; - *value = NULL; +dmu_buf_t *db; +char *packed = NULL; +size_t nvsize = 0; +int error; +*value = NULL; - error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); - if (error) - return (error); +error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); +if (error) + return (error); - nvsize = *(uint64_t *)db->db_data; - dmu_buf_rele(db, FTAG); +nvsize = *(uint64_t *)db->db_data; +dmu_buf_rele(db, FTAG); - packed = vmem_alloc(nvsize, KM_SLEEP); - error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, - DMU_READ_PREFETCH); - if (error == 0) - error = nvlist_unpack(packed, nvsize, value, 0); - vmem_free(packed, nvsize); +packed = vmem_alloc(nvsize, KM_SLEEP); +error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, + DMU_READ_PREFETCH); +if (error == 0) + error = nvlist_unpack(packed, nvsize, value, 0); +vmem_free(packed, nvsize); - return (error); +return (error); } /* - * Concrete top-level vdevs that are not missing and are not logs. At every - * spa_sync we write new uberblocks to at least SPA_SYNC_MIN_VDEVS core tvds. - */ +* Concrete top-level vdevs that are not missing and are not logs. At every +* spa_sync we write new uberblocks to at least SPA_SYNC_MIN_VDEVS core tvds. +*/ static uint64_t spa_healthy_core_tvds(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; - uint64_t tvds = 0; +vdev_t *rvd = spa->spa_root_vdev; +uint64_t tvds = 0; - for (uint64_t i = 0; i < rvd->vdev_children; i++) { - vdev_t *vd = rvd->vdev_child[i]; - if (vd->vdev_islog) - continue; - if (vdev_is_concrete(vd) && !vdev_is_dead(vd)) - tvds++; - } +for (uint64_t i = 0; i < rvd->vdev_children; i++) { + vdev_t *vd = rvd->vdev_child[i]; + if (vd->vdev_islog) + continue; + if (vdev_is_concrete(vd) && !vdev_is_dead(vd)) + tvds++; +} - return (tvds); +return (tvds); } /* - * Checks to see if the given vdev could not be opened, in which case we post a - * sysevent to notify the autoreplace code that the device has been removed. - */ +* Checks to see if the given vdev could not be opened, in which case we post a +* sysevent to notify the autoreplace code that the device has been removed. +*/ static void spa_check_removed(vdev_t *vd) { - for (uint64_t c = 0; c < vd->vdev_children; c++) - spa_check_removed(vd->vdev_child[c]); +for (uint64_t c = 0; c < vd->vdev_children; c++) + spa_check_removed(vd->vdev_child[c]); - if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && - vdev_is_concrete(vd)) { - zfs_post_autoreplace(vd->vdev_spa, vd); - spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); - } +if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && + vdev_is_concrete(vd)) { + zfs_post_autoreplace(vd->vdev_spa, vd); + spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); +} } static int spa_check_for_missing_logs(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If we're doing a normal import, then build up any additional - * diagnostic information about missing log devices. - * We'll pass this up to the user for further processing. - */ - if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { - nvlist_t **child, *nv; - uint64_t idx = 0; +/* + * If we're doing a normal import, then build up any additional + * diagnostic information about missing log devices. + * We'll pass this up to the user for further processing. + */ +if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { + nvlist_t **child, *nv; + uint64_t idx = 0; - child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *), - KM_SLEEP); - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *), + KM_SLEEP); + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; - /* - * We consider a device as missing only if it failed - * to open (i.e. offline or faulted is not considered - * as missing). - */ - if (tvd->vdev_islog && - tvd->vdev_state == VDEV_STATE_CANT_OPEN) { - child[idx++] = vdev_config_generate(spa, tvd, - B_FALSE, VDEV_CONFIG_MISSING); - } + /* + * We consider a device as missing only if it failed + * to open (i.e. offline or faulted is not considered + * as missing). + */ + if (tvd->vdev_islog && + tvd->vdev_state == VDEV_STATE_CANT_OPEN) { + child[idx++] = vdev_config_generate(spa, tvd, + B_FALSE, VDEV_CONFIG_MISSING); } + } - if (idx > 0) { - fnvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, idx); - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_MISSING_DEVICES, nv); + if (idx > 0) { + fnvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, idx); + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_MISSING_DEVICES, nv); - for (uint64_t i = 0; i < idx; i++) - nvlist_free(child[i]); - } - nvlist_free(nv); - kmem_free(child, rvd->vdev_children * sizeof (char **)); + for (uint64_t i = 0; i < idx; i++) + nvlist_free(child[i]); + } + nvlist_free(nv); + kmem_free(child, rvd->vdev_children * sizeof (char **)); + + if (idx > 0) { + spa_load_failed(spa, "some log devices are missing"); + vdev_dbgmsg_print_tree(rvd, 2); + return (SET_ERROR(ENXIO)); + } +} else { + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; - if (idx > 0) { - spa_load_failed(spa, "some log devices are missing"); + if (tvd->vdev_islog && + tvd->vdev_state == VDEV_STATE_CANT_OPEN) { + spa_set_log_state(spa, SPA_LOG_CLEAR); + spa_load_note(spa, "some log devices are " + "missing, ZIL is dropped."); vdev_dbgmsg_print_tree(rvd, 2); - return (SET_ERROR(ENXIO)); - } - } else { - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - - if (tvd->vdev_islog && - tvd->vdev_state == VDEV_STATE_CANT_OPEN) { - spa_set_log_state(spa, SPA_LOG_CLEAR); - spa_load_note(spa, "some log devices are " - "missing, ZIL is dropped."); - vdev_dbgmsg_print_tree(rvd, 2); - break; - } + break; } } +} - return (0); +return (0); } /* - * Check for missing log devices - */ +* Check for missing log devices +*/ static boolean_t spa_check_logs(spa_t *spa) { - boolean_t rv = B_FALSE; - dsl_pool_t *dp = spa_get_dsl(spa); +boolean_t rv = B_FALSE; +dsl_pool_t *dp = spa_get_dsl(spa); - switch (spa->spa_log_state) { - default: - break; - case SPA_LOG_MISSING: - /* need to recheck in case slog has been restored */ - case SPA_LOG_UNKNOWN: - rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, - zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); - if (rv) - spa_set_log_state(spa, SPA_LOG_MISSING); - break; - } - return (rv); +switch (spa->spa_log_state) { +default: + break; +case SPA_LOG_MISSING: + /* need to recheck in case slog has been restored */ +case SPA_LOG_UNKNOWN: + rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); + if (rv) + spa_set_log_state(spa, SPA_LOG_MISSING); + break; +} +return (rv); } static boolean_t spa_passivate_log(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; - boolean_t slog_found = B_FALSE; +vdev_t *rvd = spa->spa_root_vdev; +boolean_t slog_found = B_FALSE; - ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); +ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); - if (!spa_has_slogs(spa)) - return (B_FALSE); +if (!spa_has_slogs(spa)) + return (B_FALSE); - for (int c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - metaslab_group_t *mg = tvd->vdev_mg; +for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; - if (tvd->vdev_islog) { - metaslab_group_passivate(mg); - slog_found = B_TRUE; - } + if (tvd->vdev_islog) { + metaslab_group_passivate(mg); + slog_found = B_TRUE; } +} - return (slog_found); +return (slog_found); } static void spa_activate_log(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); +ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); - for (int c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - metaslab_group_t *mg = tvd->vdev_mg; +for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; - if (tvd->vdev_islog) - metaslab_group_activate(mg); - } + if (tvd->vdev_islog) + metaslab_group_activate(mg); +} } int spa_reset_logs(spa_t *spa) { - int error; +int error; - error = dmu_objset_find(spa_name(spa), zil_reset, - NULL, DS_FIND_CHILDREN); - if (error == 0) { - /* - * We successfully offlined the log device, sync out the - * current txg so that the "stubby" block can be removed - * by zil_sync(). - */ - txg_wait_synced(spa->spa_dsl_pool, 0); - } - return (error); +error = dmu_objset_find(spa_name(spa), zil_reset, + NULL, DS_FIND_CHILDREN); +if (error == 0) { + /* + * We successfully offlined the log device, sync out the + * current txg so that the "stubby" block can be removed + * by zil_sync(). + */ + txg_wait_synced(spa->spa_dsl_pool, 0); +} +return (error); } static void spa_aux_check_removed(spa_aux_vdev_t *sav) { - for (int i = 0; i < sav->sav_count; i++) - spa_check_removed(sav->sav_vdevs[i]); +for (int i = 0; i < sav->sav_count; i++) + spa_check_removed(sav->sav_vdevs[i]); } void spa_claim_notify(zio_t *zio) { - spa_t *spa = zio->io_spa; +spa_t *spa = zio->io_spa; - if (zio->io_error) - return; +if (zio->io_error) + return; - mutex_enter(&spa->spa_props_lock); /* any mutex will do */ - if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) - spa->spa_claim_max_txg = zio->io_bp->blk_birth; - mutex_exit(&spa->spa_props_lock); +mutex_enter(&spa->spa_props_lock); /* any mutex will do */ +if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) + spa->spa_claim_max_txg = zio->io_bp->blk_birth; +mutex_exit(&spa->spa_props_lock); } typedef struct spa_load_error { - uint64_t sle_meta_count; - uint64_t sle_data_count; +uint64_t sle_meta_count; +uint64_t sle_data_count; } spa_load_error_t; static void spa_load_verify_done(zio_t *zio) { - blkptr_t *bp = zio->io_bp; - spa_load_error_t *sle = zio->io_private; - dmu_object_type_t type = BP_GET_TYPE(bp); - int error = zio->io_error; - spa_t *spa = zio->io_spa; - - abd_free(zio->io_abd); - if (error) { - if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && - type != DMU_OT_INTENT_LOG) - atomic_inc_64(&sle->sle_meta_count); - else - atomic_inc_64(&sle->sle_data_count); - } +blkptr_t *bp = zio->io_bp; +spa_load_error_t *sle = zio->io_private; +dmu_object_type_t type = BP_GET_TYPE(bp); +int error = zio->io_error; +spa_t *spa = zio->io_spa; + +abd_free(zio->io_abd); +if (error) { + if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && + type != DMU_OT_INTENT_LOG) + atomic_inc_64(&sle->sle_meta_count); + else + atomic_inc_64(&sle->sle_data_count); +} - mutex_enter(&spa->spa_scrub_lock); - spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); - cv_broadcast(&spa->spa_scrub_io_cv); - mutex_exit(&spa->spa_scrub_lock); +mutex_enter(&spa->spa_scrub_lock); +spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); +cv_broadcast(&spa->spa_scrub_io_cv); +mutex_exit(&spa->spa_scrub_lock); } /* - * Maximum number of inflight bytes is the log2 fraction of the arc size. - * By default, we set it to 1/16th of the arc. - */ +* Maximum number of inflight bytes is the log2 fraction of the arc size. +* By default, we set it to 1/16th of the arc. +*/ int spa_load_verify_shift = 4; int spa_load_verify_metadata = B_TRUE; int spa_load_verify_data = B_TRUE; @@ -2123,4100 +2132,4129 @@ int spa_load_verify_data = B_TRUE; /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) +const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) - return (0); - /* - * Note: normally this routine will not be called if - * spa_load_verify_metadata is not set. However, it may be useful - * to manually set the flag after the traversal has begun. - */ - if (!spa_load_verify_metadata) - return (0); - if (!BP_IS_METADATA(bp) && !spa_load_verify_data) - return (0); - - uint64_t maxinflight_bytes = - arc_target_bytes() >> spa_load_verify_shift; - zio_t *rio = arg; - size_t size = BP_GET_PSIZE(bp); - - mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_bytes >= maxinflight_bytes) - cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_load_verify_bytes += size; - mutex_exit(&spa->spa_scrub_lock); - - zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, - spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | - ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); +if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + return (0); +/* + * Note: normally this routine will not be called if + * spa_load_verify_metadata is not set. However, it may be useful + * to manually set the flag after the traversal has begun. + */ +if (!spa_load_verify_metadata) + return (0); +if (!BP_IS_METADATA(bp) && !spa_load_verify_data) return (0); + +uint64_t maxinflight_bytes = + arc_target_bytes() >> spa_load_verify_shift; +zio_t *rio = arg; +size_t size = BP_GET_PSIZE(bp); + +mutex_enter(&spa->spa_scrub_lock); +while (spa->spa_load_verify_bytes >= maxinflight_bytes) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); +spa->spa_load_verify_bytes += size; +mutex_exit(&spa->spa_scrub_lock); + +zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, + spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); +return (0); } /* ARGSUSED */ int verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) - return (SET_ERROR(ENAMETOOLONG)); +if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) + return (SET_ERROR(ENAMETOOLONG)); - return (0); +return (0); } static int spa_load_verify(spa_t *spa) { - zio_t *rio; - spa_load_error_t sle = { 0 }; - zpool_load_policy_t policy; - boolean_t verify_ok = B_FALSE; - int error = 0; +zio_t *rio; +spa_load_error_t sle = { 0 }; +zpool_load_policy_t policy; +boolean_t verify_ok = B_FALSE; +int error = 0; - zpool_get_load_policy(spa->spa_config, &policy); - - if (policy.zlp_rewind & ZPOOL_NEVER_REWIND) - return (0); +zpool_get_load_policy(spa->spa_config, &policy); - dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); - error = dmu_objset_find_dp(spa->spa_dsl_pool, - spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, - DS_FIND_CHILDREN); - dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); - if (error != 0) - return (error); +if (policy.zlp_rewind & ZPOOL_NEVER_REWIND) + return (0); - rio = zio_root(spa, NULL, &sle, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); +dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); +error = dmu_objset_find_dp(spa->spa_dsl_pool, + spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, + DS_FIND_CHILDREN); +dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); +if (error != 0) + return (error); - if (spa_load_verify_metadata) { - if (spa->spa_extreme_rewind) { - spa_load_note(spa, "performing a complete scan of the " - "pool since extreme rewind is on. This may take " - "a very long time.\n (spa_load_verify_data=%u, " - "spa_load_verify_metadata=%u)", - spa_load_verify_data, spa_load_verify_metadata); - } +rio = zio_root(spa, NULL, &sle, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); - error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | - TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); +if (spa_load_verify_metadata) { + if (spa->spa_extreme_rewind) { + spa_load_note(spa, "performing a complete scan of the " + "pool since extreme rewind is on. This may take " + "a very long time.\n (spa_load_verify_data=%u, " + "spa_load_verify_metadata=%u)", + spa_load_verify_data, spa_load_verify_metadata); } - (void) zio_wait(rio); - ASSERT0(spa->spa_load_verify_bytes); + error = traverse_pool(spa, spa->spa_verify_min_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); +} + +(void) zio_wait(rio); +ASSERT0(spa->spa_load_verify_bytes); - spa->spa_load_meta_errors = sle.sle_meta_count; - spa->spa_load_data_errors = sle.sle_data_count; +spa->spa_load_meta_errors = sle.sle_meta_count; +spa->spa_load_data_errors = sle.sle_data_count; - if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { - spa_load_note(spa, "spa_load_verify found %llu metadata errors " - "and %llu data errors", (u_longlong_t)sle.sle_meta_count, - (u_longlong_t)sle.sle_data_count); - } +if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { + spa_load_note(spa, "spa_load_verify found %llu metadata errors " + "and %llu data errors", (u_longlong_t)sle.sle_meta_count, + (u_longlong_t)sle.sle_data_count); +} - if (spa_load_verify_dryrun || - (!error && sle.sle_meta_count <= policy.zlp_maxmeta && - sle.sle_data_count <= policy.zlp_maxdata)) { - int64_t loss = 0; +if (spa_load_verify_dryrun || + (!error && sle.sle_meta_count <= policy.zlp_maxmeta && + sle.sle_data_count <= policy.zlp_maxdata)) { + int64_t loss = 0; - verify_ok = B_TRUE; - spa->spa_load_txg = spa->spa_uberblock.ub_txg; - spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; + verify_ok = B_TRUE; + spa->spa_load_txg = spa->spa_uberblock.ub_txg; + spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; - loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; - VERIFY(nvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); - VERIFY(nvlist_add_int64(spa->spa_load_info, - ZPOOL_CONFIG_REWIND_TIME, loss) == 0); - VERIFY(nvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); - } else { - spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; - } + loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); + VERIFY(nvlist_add_int64(spa->spa_load_info, + ZPOOL_CONFIG_REWIND_TIME, loss) == 0); + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); +} else { + spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; +} - if (spa_load_verify_dryrun) - return (0); +if (spa_load_verify_dryrun) + return (0); - if (error) { - if (error != ENXIO && error != EIO) - error = SET_ERROR(EIO); - return (error); - } +if (error) { + if (error != ENXIO && error != EIO) + error = SET_ERROR(EIO); + return (error); +} - return (verify_ok ? 0 : EIO); +return (verify_ok ? 0 : EIO); } /* - * Find a value in the pool props object. - */ +* Find a value in the pool props object. +*/ static void spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) { - (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, - zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); +(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, + zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); } /* - * Find a value in the pool directory object. - */ +* Find a value in the pool directory object. +*/ static int spa_dir_prop(spa_t *spa, const char *name, uint64_t *val, boolean_t log_enoent) { - int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - name, sizeof (uint64_t), 1, val); +int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + name, sizeof (uint64_t), 1, val); - if (error != 0 && (error != ENOENT || log_enoent)) { - spa_load_failed(spa, "couldn't get '%s' value in MOS directory " - "[error=%d]", name, error); - } +if (error != 0 && (error != ENOENT || log_enoent)) { + spa_load_failed(spa, "couldn't get '%s' value in MOS directory " + "[error=%d]", name, error); +} - return (error); +return (error); } static int spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) { - vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); - return (SET_ERROR(err)); +vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); +return (SET_ERROR(err)); } static void spa_spawn_aux_threads(spa_t *spa) { - ASSERT(spa_writeable(spa)); +ASSERT(spa_writeable(spa)); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa_start_indirect_condensing_thread(spa); +spa_start_raidz_expansion_thread(spa); +spa_start_indirect_condensing_thread(spa); - ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); - spa->spa_checkpoint_discard_zthr = - zthr_create(spa_checkpoint_discard_thread_check, - spa_checkpoint_discard_thread, spa); +ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); +spa->spa_checkpoint_discard_zthr = + zthr_create(spa_checkpoint_discard_thread_check, + spa_checkpoint_discard_thread, spa); } /* - * Fix up config after a partly-completed split. This is done with the - * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off - * pool have that entry in their config, but only the splitting one contains - * a list of all the guids of the vdevs that are being split off. - * - * This function determines what to do with that list: either rejoin - * all the disks to the pool, or complete the splitting process. To attempt - * the rejoin, each disk that is offlined is marked online again, and - * we do a reopen() call. If the vdev label for every disk that was - * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) - * then we call vdev_split() on each disk, and complete the split. - * - * Otherwise we leave the config alone, with all the vdevs in place in - * the original pool. - */ +* Fix up config after a partly-completed split. This is done with the +* ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off +* pool have that entry in their config, but only the splitting one contains +* a list of all the guids of the vdevs that are being split off. +* +* This function determines what to do with that list: either rejoin +* all the disks to the pool, or complete the splitting process. To attempt +* the rejoin, each disk that is offlined is marked online again, and +* we do a reopen() call. If the vdev label for every disk that was +* marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) +* then we call vdev_split() on each disk, and complete the split. +* +* Otherwise we leave the config alone, with all the vdevs in place in +* the original pool. +*/ static void spa_try_repair(spa_t *spa, nvlist_t *config) { - uint_t extracted; - uint64_t *glist; - uint_t i, gcount; - nvlist_t *nvl; - vdev_t **vd; - boolean_t attempt_reopen; +uint_t extracted; +uint64_t *glist; +uint_t i, gcount; +nvlist_t *nvl; +vdev_t **vd; +boolean_t attempt_reopen; - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) - return; +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) + return; - /* check that the config is complete */ - if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, - &glist, &gcount) != 0) - return; +/* check that the config is complete */ +if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, + &glist, &gcount) != 0) + return; - vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); +vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); - /* attempt to online all the vdevs & validate */ - attempt_reopen = B_TRUE; - for (i = 0; i < gcount; i++) { - if (glist[i] == 0) /* vdev is hole */ - continue; +/* attempt to online all the vdevs & validate */ +attempt_reopen = B_TRUE; +for (i = 0; i < gcount; i++) { + if (glist[i] == 0) /* vdev is hole */ + continue; - vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); - if (vd[i] == NULL) { - /* - * Don't bother attempting to reopen the disks; - * just do the split. - */ - attempt_reopen = B_FALSE; - } else { - /* attempt to re-online it */ - vd[i]->vdev_offline = B_FALSE; - } + vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); + if (vd[i] == NULL) { + /* + * Don't bother attempting to reopen the disks; + * just do the split. + */ + attempt_reopen = B_FALSE; + } else { + /* attempt to re-online it */ + vd[i]->vdev_offline = B_FALSE; } +} - if (attempt_reopen) { - vdev_reopen(spa->spa_root_vdev); +if (attempt_reopen) { + vdev_reopen(spa->spa_root_vdev); - /* check each device to see what state it's in */ - for (extracted = 0, i = 0; i < gcount; i++) { - if (vd[i] != NULL && - vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) - break; - ++extracted; - } + /* check each device to see what state it's in */ + for (extracted = 0, i = 0; i < gcount; i++) { + if (vd[i] != NULL && + vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) + break; + ++extracted; } +} - /* - * If every disk has been moved to the new pool, or if we never - * even attempted to look at them, then we split them off for - * good. - */ - if (!attempt_reopen || gcount == extracted) { - for (i = 0; i < gcount; i++) - if (vd[i] != NULL) - vdev_split(vd[i]); - vdev_reopen(spa->spa_root_vdev); - } +/* + * If every disk has been moved to the new pool, or if we never + * even attempted to look at them, then we split them off for + * good. + */ +if (!attempt_reopen || gcount == extracted) { + for (i = 0; i < gcount; i++) + if (vd[i] != NULL) + vdev_split(vd[i]); + vdev_reopen(spa->spa_root_vdev); +} - kmem_free(vd, gcount * sizeof (vdev_t *)); +kmem_free(vd, gcount * sizeof (vdev_t *)); } static int spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) { - char *ereport = FM_EREPORT_ZFS_POOL; - int error; +char *ereport = FM_EREPORT_ZFS_POOL; +int error; - spa->spa_load_state = state; - (void) spa_import_progress_set_state(spa_guid(spa), - spa_load_state(spa)); +spa->spa_load_state = state; +(void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); - gethrestime(&spa->spa_loaded_ts); - error = spa_load_impl(spa, type, &ereport); +gethrestime(&spa->spa_loaded_ts); +error = spa_load_impl(spa, type, &ereport); - /* - * Don't count references from objsets that are already closed - * and are making their way through the eviction process. - */ - spa_evicting_os_wait(spa); - spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); - if (error) { - if (error != EEXIST) { - spa->spa_loaded_ts.tv_sec = 0; - spa->spa_loaded_ts.tv_nsec = 0; - } - if (error != EBADF) { - zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); - } +/* + * Don't count references from objsets that are already closed + * and are making their way through the eviction process. + */ +spa_evicting_os_wait(spa); +spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); +if (error) { + if (error != EEXIST) { + spa->spa_loaded_ts.tv_sec = 0; + spa->spa_loaded_ts.tv_nsec = 0; } - spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; - spa->spa_ena = 0; + if (error != EBADF) { + zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); + } +} +spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; +spa->spa_ena = 0; - (void) spa_import_progress_set_state(spa_guid(spa), - spa_load_state(spa)); +(void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); - return (error); +return (error); } #ifdef ZFS_DEBUG /* - * Count the number of per-vdev ZAPs associated with all of the vdevs in the - * vdev tree rooted in the given vd, and ensure that each ZAP is present in the - * spa's per-vdev ZAP list. - */ +* Count the number of per-vdev ZAPs associated with all of the vdevs in the +* vdev tree rooted in the given vd, and ensure that each ZAP is present in the +* spa's per-vdev ZAP list. +*/ static uint64_t vdev_count_verify_zaps(vdev_t *vd) { - spa_t *spa = vd->vdev_spa; - uint64_t total = 0; +spa_t *spa = vd->vdev_spa; +uint64_t total = 0; - if (vd->vdev_top_zap != 0) { - total++; - ASSERT0(zap_lookup_int(spa->spa_meta_objset, - spa->spa_all_vdev_zaps, vd->vdev_top_zap)); - } - if (vd->vdev_leaf_zap != 0) { - total++; - ASSERT0(zap_lookup_int(spa->spa_meta_objset, - spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); - } +if (vd->vdev_top_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_top_zap)); +} +if (vd->vdev_leaf_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); +} - for (uint64_t i = 0; i < vd->vdev_children; i++) { - total += vdev_count_verify_zaps(vd->vdev_child[i]); - } +for (uint64_t i = 0; i < vd->vdev_children; i++) { + total += vdev_count_verify_zaps(vd->vdev_child[i]); +} - return (total); +return (total); } #endif /* - * Determine whether the activity check is required. - */ +* Determine whether the activity check is required. +*/ static boolean_t spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, - nvlist_t *config) +nvlist_t *config) { - uint64_t state = 0; - uint64_t hostid = 0; - uint64_t tryconfig_txg = 0; - uint64_t tryconfig_timestamp = 0; - uint16_t tryconfig_mmp_seq = 0; - nvlist_t *nvinfo; +uint64_t state = 0; +uint64_t hostid = 0; +uint64_t tryconfig_txg = 0; +uint64_t tryconfig_timestamp = 0; +uint16_t tryconfig_mmp_seq = 0; +nvlist_t *nvinfo; - if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG, - &tryconfig_txg); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP, - &tryconfig_timestamp); - (void) nvlist_lookup_uint16(nvinfo, ZPOOL_CONFIG_MMP_SEQ, - &tryconfig_mmp_seq); - } +if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG, + &tryconfig_txg); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + &tryconfig_timestamp); + (void) nvlist_lookup_uint16(nvinfo, ZPOOL_CONFIG_MMP_SEQ, + &tryconfig_mmp_seq); +} - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state); +(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state); - /* - * Disable the MMP activity check - This is used by zdb which - * is intended to be used on potentially active pools. - */ - if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) - return (B_FALSE); +/* + * Disable the MMP activity check - This is used by zdb which + * is intended to be used on potentially active pools. + */ +if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) + return (B_FALSE); - /* - * Skip the activity check when the MMP feature is disabled. - */ - if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) - return (B_FALSE); +/* + * Skip the activity check when the MMP feature is disabled. + */ +if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) + return (B_FALSE); - /* - * If the tryconfig_ values are nonzero, they are the results of an - * earlier tryimport. If they all match the uberblock we just found, - * then the pool has not changed and we return false so we do not test - * a second time. - */ - if (tryconfig_txg && tryconfig_txg == ub->ub_txg && - tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp && - tryconfig_mmp_seq && tryconfig_mmp_seq == - (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) - return (B_FALSE); +/* + * If the tryconfig_ values are nonzero, they are the results of an + * earlier tryimport. If they all match the uberblock we just found, + * then the pool has not changed and we return false so we do not test + * a second time. + */ +if (tryconfig_txg && tryconfig_txg == ub->ub_txg && + tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp && + tryconfig_mmp_seq && tryconfig_mmp_seq == + (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) + return (B_FALSE); - /* - * Allow the activity check to be skipped when importing the pool - * on the same host which last imported it. Since the hostid from - * configuration may be stale use the one read from the label. - */ - if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) - hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); +/* + * Allow the activity check to be skipped when importing the pool + * on the same host which last imported it. Since the hostid from + * configuration may be stale use the one read from the label. + */ +if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); - if (hostid == spa_get_hostid(spa)) - return (B_FALSE); +if (hostid == spa_get_hostid(spa)) + return (B_FALSE); - /* - * Skip the activity test when the pool was cleanly exported. - */ - if (state != POOL_STATE_ACTIVE) - return (B_FALSE); +/* + * Skip the activity test when the pool was cleanly exported. + */ +if (state != POOL_STATE_ACTIVE) + return (B_FALSE); - return (B_TRUE); +return (B_TRUE); } /* - * Nanoseconds the activity check must watch for changes on-disk. - */ +* Nanoseconds the activity check must watch for changes on-disk. +*/ static uint64_t spa_activity_check_duration(spa_t *spa, uberblock_t *ub) { - uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1); - uint64_t multihost_interval = MSEC2NSEC( - MMP_INTERVAL_OK(zfs_multihost_interval)); - uint64_t import_delay = MAX(NANOSEC, import_intervals * - multihost_interval); +uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1); +uint64_t multihost_interval = MSEC2NSEC( + MMP_INTERVAL_OK(zfs_multihost_interval)); +uint64_t import_delay = MAX(NANOSEC, import_intervals * + multihost_interval); - /* - * Local tunables determine a minimum duration except for the case - * where we know when the remote host will suspend the pool if MMP - * writes do not land. - * - * See Big Theory comment at the top of mmp.c for the reasoning behind - * these cases and times. - */ +/* + * Local tunables determine a minimum duration except for the case + * where we know when the remote host will suspend the pool if MMP + * writes do not land. + * + * See Big Theory comment at the top of mmp.c for the reasoning behind + * these cases and times. + */ - ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100); +ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100); - if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && - MMP_FAIL_INT(ub) > 0) { +if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && + MMP_FAIL_INT(ub) > 0) { - /* MMP on remote host will suspend pool after failed writes */ - import_delay = MMP_FAIL_INT(ub) * MSEC2NSEC(MMP_INTERVAL(ub)) * - MMP_IMPORT_SAFETY_FACTOR / 100; + /* MMP on remote host will suspend pool after failed writes */ + import_delay = MMP_FAIL_INT(ub) * MSEC2NSEC(MMP_INTERVAL(ub)) * + MMP_IMPORT_SAFETY_FACTOR / 100; - zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp " - "mmp_fails=%llu ub_mmp mmp_interval=%llu " - "import_intervals=%u", import_delay, MMP_FAIL_INT(ub), - MMP_INTERVAL(ub), import_intervals); + zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp " + "mmp_fails=%llu ub_mmp mmp_interval=%llu " + "import_intervals=%u", import_delay, MMP_FAIL_INT(ub), + MMP_INTERVAL(ub), import_intervals); - } else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && - MMP_FAIL_INT(ub) == 0) { +} else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && + MMP_FAIL_INT(ub) == 0) { - /* MMP on remote host will never suspend pool */ - import_delay = MAX(import_delay, (MSEC2NSEC(MMP_INTERVAL(ub)) + - ub->ub_mmp_delay) * import_intervals); + /* MMP on remote host will never suspend pool */ + import_delay = MAX(import_delay, (MSEC2NSEC(MMP_INTERVAL(ub)) + + ub->ub_mmp_delay) * import_intervals); - zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp " - "mmp_interval=%llu ub_mmp_delay=%llu " - "import_intervals=%u", import_delay, MMP_INTERVAL(ub), - ub->ub_mmp_delay, import_intervals); + zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp " + "mmp_interval=%llu ub_mmp_delay=%llu " + "import_intervals=%u", import_delay, MMP_INTERVAL(ub), + ub->ub_mmp_delay, import_intervals); - } else if (MMP_VALID(ub)) { - /* - * zfs-0.7 compatibility case - */ +} else if (MMP_VALID(ub)) { + /* + * zfs-0.7 compatibility case + */ - import_delay = MAX(import_delay, (multihost_interval + - ub->ub_mmp_delay) * import_intervals); + import_delay = MAX(import_delay, (multihost_interval + + ub->ub_mmp_delay) * import_intervals); - zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu " - "import_intervals=%u leaves=%u", import_delay, - ub->ub_mmp_delay, import_intervals, - vdev_count_leaves(spa)); - } else { - /* Using local tunings is the only reasonable option */ - zfs_dbgmsg("pool last imported on non-MMP aware " - "host using import_delay=%llu multihost_interval=%llu " - "import_intervals=%u", import_delay, multihost_interval, - import_intervals); - } + zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu " + "import_intervals=%u leaves=%u", import_delay, + ub->ub_mmp_delay, import_intervals, + vdev_count_leaves(spa)); +} else { + /* Using local tunings is the only reasonable option */ + zfs_dbgmsg("pool last imported on non-MMP aware " + "host using import_delay=%llu multihost_interval=%llu " + "import_intervals=%u", import_delay, multihost_interval, + import_intervals); +} - return (import_delay); +return (import_delay); } /* - * Perform the import activity check. If the user canceled the import or - * we detected activity then fail. - */ +* Perform the import activity check. If the user canceled the import or +* we detected activity then fail. +*/ static int spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) { - uint64_t txg = ub->ub_txg; - uint64_t timestamp = ub->ub_timestamp; - uint64_t mmp_config = ub->ub_mmp_config; - uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; - uint64_t import_delay; - hrtime_t import_expire; - nvlist_t *mmp_label = NULL; - vdev_t *rvd = spa->spa_root_vdev; - kcondvar_t cv; - kmutex_t mtx; - int error = 0; +uint64_t txg = ub->ub_txg; +uint64_t timestamp = ub->ub_timestamp; +uint64_t mmp_config = ub->ub_mmp_config; +uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; +uint64_t import_delay; +hrtime_t import_expire; +nvlist_t *mmp_label = NULL; +vdev_t *rvd = spa->spa_root_vdev; +kcondvar_t cv; +kmutex_t mtx; +int error = 0; + +cv_init(&cv, NULL, CV_DEFAULT, NULL); +mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL); +mutex_enter(&mtx); - cv_init(&cv, NULL, CV_DEFAULT, NULL); - mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL); - mutex_enter(&mtx); +/* + * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed + * during the earlier tryimport. If the txg recorded there is 0 then + * the pool is known to be active on another host. + * + * Otherwise, the pool might be in use on another host. Check for + * changes in the uberblocks on disk if necessary. + */ +if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvlist_t *nvinfo = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO); - /* - * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed - * during the earlier tryimport. If the txg recorded there is 0 then - * the pool is known to be active on another host. - * - * Otherwise, the pool might be in use on another host. Check for - * changes in the uberblocks on disk if necessary. - */ - if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { - nvlist_t *nvinfo = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_LOAD_INFO); - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) && - fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) { - vdev_uberblock_load(rvd, ub, &mmp_label); - error = SET_ERROR(EREMOTEIO); - goto out; - } + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) && + fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) { + vdev_uberblock_load(rvd, ub, &mmp_label); + error = SET_ERROR(EREMOTEIO); + goto out; } +} - import_delay = spa_activity_check_duration(spa, ub); +import_delay = spa_activity_check_duration(spa, ub); - /* Add a small random factor in case of simultaneous imports (0-25%) */ - import_delay += import_delay * spa_get_random(250) / 1000; +/* Add a small random factor in case of simultaneous imports (0-25%) */ +import_delay += import_delay * spa_get_random(250) / 1000; - import_expire = gethrtime() + import_delay; +import_expire = gethrtime() + import_delay; - while (gethrtime() < import_expire) { - (void) spa_import_progress_set_mmp_check(spa_guid(spa), - NSEC2SEC(import_expire - gethrtime())); +while (gethrtime() < import_expire) { + (void) spa_import_progress_set_mmp_check(spa_guid(spa), + NSEC2SEC(import_expire - gethrtime())); - vdev_uberblock_load(rvd, ub, &mmp_label); + vdev_uberblock_load(rvd, ub, &mmp_label); - if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || - mmp_seq != (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) { - zfs_dbgmsg("multihost activity detected " - "txg %llu ub_txg %llu " - "timestamp %llu ub_timestamp %llu " - "mmp_config %#llx ub_mmp_config %#llx", - txg, ub->ub_txg, timestamp, ub->ub_timestamp, - mmp_config, ub->ub_mmp_config); + if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || + mmp_seq != (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) { + zfs_dbgmsg("multihost activity detected " + "txg %llu ub_txg %llu " + "timestamp %llu ub_timestamp %llu " + "mmp_config %#llx ub_mmp_config %#llx", + txg, ub->ub_txg, timestamp, ub->ub_timestamp, + mmp_config, ub->ub_mmp_config); - error = SET_ERROR(EREMOTEIO); - break; - } + error = SET_ERROR(EREMOTEIO); + break; + } - if (mmp_label) { - nvlist_free(mmp_label); - mmp_label = NULL; - } + if (mmp_label) { + nvlist_free(mmp_label); + mmp_label = NULL; + } - error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz); - if (error != -1) { - error = SET_ERROR(EINTR); - break; - } - error = 0; + error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz); + if (error != -1) { + error = SET_ERROR(EINTR); + break; } + error = 0; +} out: - mutex_exit(&mtx); - mutex_destroy(&mtx); - cv_destroy(&cv); +mutex_exit(&mtx); +mutex_destroy(&mtx); +cv_destroy(&cv); - /* - * If the pool is determined to be active store the status in the - * spa->spa_load_info nvlist. If the remote hostname or hostid are - * available from configuration read from disk store them as well. - * This allows 'zpool import' to generate a more useful message. - * - * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) - * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool - * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool - */ - if (error == EREMOTEIO) { - char *hostname = ""; - uint64_t hostid = 0; - - if (mmp_label) { - if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) { - hostname = fnvlist_lookup_string(mmp_label, - ZPOOL_CONFIG_HOSTNAME); - fnvlist_add_string(spa->spa_load_info, - ZPOOL_CONFIG_MMP_HOSTNAME, hostname); - } +/* + * If the pool is determined to be active store the status in the + * spa->spa_load_info nvlist. If the remote hostname or hostid are + * available from configuration read from disk store them as well. + * This allows 'zpool import' to generate a more useful message. + * + * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) + * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool + * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool + */ +if (error == EREMOTEIO) { + char *hostname = ""; + uint64_t hostid = 0; - if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) { - hostid = fnvlist_lookup_uint64(mmp_label, - ZPOOL_CONFIG_HOSTID); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_HOSTID, hostid); - } + if (mmp_label) { + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) { + hostname = fnvlist_lookup_string(mmp_label, + ZPOOL_CONFIG_HOSTNAME); + fnvlist_add_string(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTNAME, hostname); } - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_TXG, 0); - - error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO); + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) { + hostid = fnvlist_lookup_uint64(mmp_label, + ZPOOL_CONFIG_HOSTID); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTID, hostid); + } } - if (mmp_label) - nvlist_free(mmp_label); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, 0); - return (error); + error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO); +} + +if (mmp_label) + nvlist_free(mmp_label); + +return (error); } static int spa_verify_host(spa_t *spa, nvlist_t *mos_config) { - uint64_t hostid; - char *hostname; - uint64_t myhostid = 0; - - if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config, - ZPOOL_CONFIG_HOSTID, &hostid) == 0) { - hostname = fnvlist_lookup_string(mos_config, - ZPOOL_CONFIG_HOSTNAME); - - myhostid = zone_get_hostid(NULL); - - if (hostid != 0 && myhostid != 0 && hostid != myhostid) { - cmn_err(CE_WARN, "pool '%s' could not be " - "loaded as it was last accessed by " - "another system (host: %s hostid: 0x%llx). " - "See: http://illumos.org/msg/ZFS-8000-EY", - spa_name(spa), hostname, (u_longlong_t)hostid); - spa_load_failed(spa, "hostid verification failed: pool " - "last accessed by host: %s (hostid: 0x%llx)", - hostname, (u_longlong_t)hostid); - return (SET_ERROR(EBADF)); - } +uint64_t hostid; +char *hostname; +uint64_t myhostid = 0; + +if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config, + ZPOOL_CONFIG_HOSTID, &hostid) == 0) { + hostname = fnvlist_lookup_string(mos_config, + ZPOOL_CONFIG_HOSTNAME); + + myhostid = zone_get_hostid(NULL); + + if (hostid != 0 && myhostid != 0 && hostid != myhostid) { + cmn_err(CE_WARN, "pool '%s' could not be " + "loaded as it was last accessed by " + "another system (host: %s hostid: 0x%llx). " + "See: http://illumos.org/msg/ZFS-8000-EY", + spa_name(spa), hostname, (u_longlong_t)hostid); + spa_load_failed(spa, "hostid verification failed: pool " + "last accessed by host: %s (hostid: 0x%llx)", + hostname, (u_longlong_t)hostid); + return (SET_ERROR(EBADF)); } +} - return (0); +return (0); } static int spa_ld_parse_config(spa_t *spa, spa_import_type_t type) { - int error = 0; - nvlist_t *nvtree, *nvl, *config = spa->spa_config; - int parse; - vdev_t *rvd; - uint64_t pool_guid; - char *comment; +int error = 0; +nvlist_t *nvtree, *nvl, *config = spa->spa_config; +int parse; +vdev_t *rvd; +uint64_t pool_guid; +char *comment; - /* - * Versioning wasn't explicitly added to the label until later, so if - * it's not present treat it as the initial version. - */ - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &spa->spa_ubsync.ub_version) != 0) - spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; +/* + * Versioning wasn't explicitly added to the label until later, so if + * it's not present treat it as the initial version. + */ +if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &spa->spa_ubsync.ub_version) != 0) + spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { - spa_load_failed(spa, "invalid config provided: '%s' missing", - ZPOOL_CONFIG_POOL_GUID); - return (SET_ERROR(EINVAL)); - } +if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { + spa_load_failed(spa, "invalid config provided: '%s' missing", + ZPOOL_CONFIG_POOL_GUID); + return (SET_ERROR(EINVAL)); +} - /* - * If we are doing an import, ensure that the pool is not already - * imported by checking if its pool guid already exists in the - * spa namespace. - * - * The only case that we allow an already imported pool to be - * imported again, is when the pool is checkpointed and we want to - * look at its checkpointed state from userland tools like zdb. - */ +/* + * If we are doing an import, ensure that the pool is not already + * imported by checking if its pool guid already exists in the + * spa namespace. + * + * The only case that we allow an already imported pool to be + * imported again, is when the pool is checkpointed and we want to + * look at its checkpointed state from userland tools like zdb. + */ #ifdef _KERNEL - if ((spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) && - spa_guid_exists(pool_guid, 0)) { +if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0)) { #else - if ((spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) && - spa_guid_exists(pool_guid, 0) && - !spa_importing_readonly_checkpoint(spa)) { +if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0) && + !spa_importing_readonly_checkpoint(spa)) { #endif - spa_load_failed(spa, "a pool with guid %llu is already open", - (u_longlong_t)pool_guid); - return (SET_ERROR(EEXIST)); - } - - spa->spa_config_guid = pool_guid; + spa_load_failed(spa, "a pool with guid %llu is already open", + (u_longlong_t)pool_guid); + return (SET_ERROR(EEXIST)); +} - nvlist_free(spa->spa_load_info); - spa->spa_load_info = fnvlist_alloc(); +spa->spa_config_guid = pool_guid; - ASSERT(spa->spa_comment == NULL); - if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) - spa->spa_comment = spa_strdup(comment); +nvlist_free(spa->spa_load_info); +spa->spa_load_info = fnvlist_alloc(); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, - &spa->spa_config_txg); +ASSERT(spa->spa_comment == NULL); +if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + spa->spa_comment = spa_strdup(comment); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) - spa->spa_config_splitting = fnvlist_dup(nvl); +(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, + &spa->spa_config_txg); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree)) { - spa_load_failed(spa, "invalid config provided: '%s' missing", - ZPOOL_CONFIG_VDEV_TREE); - return (SET_ERROR(EINVAL)); - } +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) + spa->spa_config_splitting = fnvlist_dup(nvl); - /* - * Create "The Godfather" zio to hold all async IOs - */ - spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), - KM_SLEEP); - for (int i = 0; i < max_ncpus; i++) { - spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); - } +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree)) { + spa_load_failed(spa, "invalid config provided: '%s' missing", + ZPOOL_CONFIG_VDEV_TREE); + return (SET_ERROR(EINVAL)); +} - /* - * Parse the configuration into a vdev tree. We explicitly set the - * value that will be returned by spa_version() since parsing the - * configuration requires knowing the version number. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - parse = (type == SPA_IMPORT_EXISTING ? - VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); - error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); - spa_config_exit(spa, SCL_ALL, FTAG); +/* + * Create "The Godfather" zio to hold all async IOs + */ +spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); +for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); +} - if (error != 0) { - spa_load_failed(spa, "unable to parse config [error=%d]", - error); - return (error); - } +/* + * Parse the configuration into a vdev tree. We explicitly set the + * value that will be returned by spa_version() since parsing the + * configuration requires knowing the version number. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +parse = (type == SPA_IMPORT_EXISTING ? + VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); +error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); +spa_config_exit(spa, SCL_ALL, FTAG); + +if (error != 0) { + spa_load_failed(spa, "unable to parse config [error=%d]", + error); + return (error); +} - ASSERT(spa->spa_root_vdev == rvd); - ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); - ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); +ASSERT(spa->spa_root_vdev == rvd); +ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); +ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); - if (type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_guid(spa) == pool_guid); - } +if (type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_guid(spa) == pool_guid); +} - return (0); +return (0); } /* - * Recursively open all vdevs in the vdev tree. This function is called twice: - * first with the untrusted config, then with the trusted config. - */ +* Recursively open all vdevs in the vdev tree. This function is called twice: +* first with the untrusted config, then with the trusted config. +*/ static int spa_ld_open_vdevs(spa_t *spa) { - int error = 0; +int error = 0; - /* - * spa_missing_tvds_allowed defines how many top-level vdevs can be - * missing/unopenable for the root vdev to be still considered openable. - */ - if (spa->spa_trust_config) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds; - } else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile; - } else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan; +/* + * spa_missing_tvds_allowed defines how many top-level vdevs can be + * missing/unopenable for the root vdev to be still considered openable. + */ +if (spa->spa_trust_config) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds; +} else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile; +} else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan; +} else { + spa->spa_missing_tvds_allowed = 0; +} + +spa->spa_missing_tvds_allowed = + MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed); + +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +error = vdev_open(spa->spa_root_vdev); +spa_config_exit(spa, SCL_ALL, FTAG); + +if (spa->spa_missing_tvds != 0) { + spa_load_note(spa, "vdev tree has %lld missing top-level " + "vdevs.", (u_longlong_t)spa->spa_missing_tvds); + if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { + /* + * Although theoretically we could allow users to open + * incomplete pools in RW mode, we'd need to add a lot + * of extra logic (e.g. adjust pool space to account + * for missing vdevs). + * This limitation also prevents users from accidentally + * opening the pool in RW mode during data recovery and + * damaging it further. + */ + spa_load_note(spa, "pools with missing top-level " + "vdevs can only be opened in read-only mode."); + error = SET_ERROR(ENXIO); } else { - spa->spa_missing_tvds_allowed = 0; - } - - spa->spa_missing_tvds_allowed = - MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed); - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_open(spa->spa_root_vdev); - spa_config_exit(spa, SCL_ALL, FTAG); - - if (spa->spa_missing_tvds != 0) { - spa_load_note(spa, "vdev tree has %lld missing top-level " - "vdevs.", (u_longlong_t)spa->spa_missing_tvds); - if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { - /* - * Although theoretically we could allow users to open - * incomplete pools in RW mode, we'd need to add a lot - * of extra logic (e.g. adjust pool space to account - * for missing vdevs). - * This limitation also prevents users from accidentally - * opening the pool in RW mode during data recovery and - * damaging it further. - */ - spa_load_note(spa, "pools with missing top-level " - "vdevs can only be opened in read-only mode."); - error = SET_ERROR(ENXIO); - } else { - spa_load_note(spa, "current settings allow for maximum " - "%lld missing top-level vdevs at this stage.", - (u_longlong_t)spa->spa_missing_tvds_allowed); - } - } - if (error != 0) { - spa_load_failed(spa, "unable to open vdev tree [error=%d]", - error); + spa_load_note(spa, "current settings allow for maximum " + "%lld missing top-level vdevs at this stage.", + (u_longlong_t)spa->spa_missing_tvds_allowed); } - if (spa->spa_missing_tvds != 0 || error != 0) - vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2); +} +if (error != 0) { + spa_load_failed(spa, "unable to open vdev tree [error=%d]", + error); +} +if (spa->spa_missing_tvds != 0 || error != 0) + vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2); - return (error); +return (error); } /* - * We need to validate the vdev labels against the configuration that - * we have in hand. This function is called twice: first with an untrusted - * config, then with a trusted config. The validation is more strict when the - * config is trusted. - */ +* We need to validate the vdev labels against the configuration that +* we have in hand. This function is called twice: first with an untrusted +* config, then with a trusted config. The validation is more strict when the +* config is trusted. +*/ static int spa_ld_validate_vdevs(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_validate(rvd); - spa_config_exit(spa, SCL_ALL, FTAG); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +error = vdev_validate(rvd); +spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0) { - spa_load_failed(spa, "vdev_validate failed [error=%d]", error); - return (error); - } +if (error != 0) { + spa_load_failed(spa, "vdev_validate failed [error=%d]", error); + return (error); +} - if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { - spa_load_failed(spa, "cannot open vdev tree after invalidating " - "some vdevs"); - vdev_dbgmsg_print_tree(rvd, 2); - return (SET_ERROR(ENXIO)); - } +if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { + spa_load_failed(spa, "cannot open vdev tree after invalidating " + "some vdevs"); + vdev_dbgmsg_print_tree(rvd, 2); + return (SET_ERROR(ENXIO)); +} - return (0); +return (0); } static void spa_ld_select_uberblock_done(spa_t *spa, uberblock_t *ub) { - spa->spa_state = POOL_STATE_ACTIVE; - spa->spa_ubsync = spa->spa_uberblock; - spa->spa_verify_min_txg = spa->spa_extreme_rewind ? - TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; - spa->spa_first_txg = spa->spa_last_ubsync_txg ? - spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; - spa->spa_claim_max_txg = spa->spa_first_txg; - spa->spa_prev_software_version = ub->ub_software_version; +spa->spa_state = POOL_STATE_ACTIVE; +spa->spa_ubsync = spa->spa_uberblock; +spa->spa_verify_min_txg = spa->spa_extreme_rewind ? + TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; +spa->spa_first_txg = spa->spa_last_ubsync_txg ? + spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; +spa->spa_claim_max_txg = spa->spa_first_txg; +spa->spa_prev_software_version = ub->ub_software_version; } static int spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) { - vdev_t *rvd = spa->spa_root_vdev; - nvlist_t *label; - uberblock_t *ub = &spa->spa_uberblock; - boolean_t activity_check = B_FALSE; +vdev_t *rvd = spa->spa_root_vdev; +nvlist_t *label; +uberblock_t *ub = &spa->spa_uberblock; +boolean_t activity_check = B_FALSE; - /* - * If we are opening the checkpointed state of the pool by - * rewinding to it, at this point we will have written the - * checkpointed uberblock to the vdev labels, so searching - * the labels will find the right uberblock. However, if - * we are opening the checkpointed state read-only, we have - * not modified the labels. Therefore, we must ignore the - * labels and continue using the spa_uberblock that was set - * by spa_ld_checkpoint_rewind. - * - * Note that it would be fine to ignore the labels when - * rewinding (opening writeable) as well. However, if we - * crash just after writing the labels, we will end up - * searching the labels. Doing so in the common case means - * that this code path gets exercised normally, rather than - * just in the edge case. - */ - if (ub->ub_checkpoint_txg != 0 && - spa_importing_readonly_checkpoint(spa)) { - spa_ld_select_uberblock_done(spa, ub); - return (0); - } +/* + * If we are opening the checkpointed state of the pool by + * rewinding to it, at this point we will have written the + * checkpointed uberblock to the vdev labels, so searching + * the labels will find the right uberblock. However, if + * we are opening the checkpointed state read-only, we have + * not modified the labels. Therefore, we must ignore the + * labels and continue using the spa_uberblock that was set + * by spa_ld_checkpoint_rewind. + * + * Note that it would be fine to ignore the labels when + * rewinding (opening writeable) as well. However, if we + * crash just after writing the labels, we will end up + * searching the labels. Doing so in the common case means + * that this code path gets exercised normally, rather than + * just in the edge case. + */ +if (ub->ub_checkpoint_txg != 0 && + spa_importing_readonly_checkpoint(spa)) { + spa_ld_select_uberblock_done(spa, ub); + return (0); +} - /* - * Find the best uberblock. - */ - vdev_uberblock_load(rvd, ub, &label); +/* + * Find the best uberblock. + */ +vdev_uberblock_load(rvd, ub, &label); - /* - * If we weren't able to find a single valid uberblock, return failure. - */ - if (ub->ub_txg == 0) { +/* + * If we weren't able to find a single valid uberblock, return failure. + */ +if (ub->ub_txg == 0) { + nvlist_free(label); + spa_load_failed(spa, "no valid uberblock found"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); +} + +if (spa->spa_load_max_txg != UINT64_MAX) { + (void) spa_import_progress_set_max_txg(spa_guid(spa), + (u_longlong_t)spa->spa_load_max_txg); +} +spa_load_note(spa, "using uberblock with txg=%llu", + (u_longlong_t)ub->ub_txg); + + +/* + * For pools which have the multihost property on determine if the + * pool is truly inactive and can be safely imported. Prevent + * hosts which don't have a hostid set from importing the pool. + */ +activity_check = spa_activity_check_required(spa, ub, label, + spa->spa_config); +if (activity_check) { + if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && + spa_get_hostid(spa) == 0) { nvlist_free(label); - spa_load_failed(spa, "no valid uberblock found"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); } - if (spa->spa_load_max_txg != UINT64_MAX) { - (void) spa_import_progress_set_max_txg(spa_guid(spa), - (u_longlong_t)spa->spa_load_max_txg); + int error = spa_activity_check(spa, ub, spa->spa_config); + if (error) { + nvlist_free(label); + return (error); } - spa_load_note(spa, "using uberblock with txg=%llu", - (u_longlong_t)ub->ub_txg); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, ub->ub_txg); + fnvlist_add_uint16(spa->spa_load_info, + ZPOOL_CONFIG_MMP_SEQ, + (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)); +} + +/* + * If the pool has an unsupported version we can't open it. + */ +if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { + nvlist_free(label); + spa_load_failed(spa, "version %llu is not supported", + (u_longlong_t)ub->ub_version); + return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); +} + +if (ub->ub_version >= SPA_VERSION_FEATURES) { + nvlist_t *features; /* - * For pools which have the multihost property on determine if the - * pool is truly inactive and can be safely imported. Prevent - * hosts which don't have a hostid set from importing the pool. + * If we weren't able to find what's necessary for reading the + * MOS in the label, return failure. */ - activity_check = spa_activity_check_required(spa, ub, label, - spa->spa_config); - if (activity_check) { - if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && - spa_get_hostid(spa) == 0) { - nvlist_free(label); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); - return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); - } - - int error = spa_activity_check(spa, ub, spa->spa_config); - if (error) { - nvlist_free(label); - return (error); - } - - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_TXG, ub->ub_txg); - fnvlist_add_uint16(spa->spa_load_info, - ZPOOL_CONFIG_MMP_SEQ, - (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)); + if (label == NULL) { + spa_load_failed(spa, "label config unavailable"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + ENXIO)); } - /* - * If the pool has an unsupported version we can't open it. - */ - if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { + if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, + &features) != 0) { nvlist_free(label); - spa_load_failed(spa, "version %llu is not supported", - (u_longlong_t)ub->ub_version); - return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); - } - - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *features; - - /* - * If we weren't able to find what's necessary for reading the - * MOS in the label, return failure. - */ - if (label == NULL) { - spa_load_failed(spa, "label config unavailable"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - ENXIO)); - } - - if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, - &features) != 0) { - nvlist_free(label); - spa_load_failed(spa, "invalid label: '%s' missing", - ZPOOL_CONFIG_FEATURES_FOR_READ); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - ENXIO)); - } - - /* - * Update our in-core representation with the definitive values - * from the label. - */ - nvlist_free(spa->spa_label_features); - VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); + spa_load_failed(spa, "invalid label: '%s' missing", + ZPOOL_CONFIG_FEATURES_FOR_READ); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + ENXIO)); } - nvlist_free(label); - /* - * Look through entries in the label nvlist's features_for_read. If - * there is a feature listed there which we don't understand then we - * cannot open a pool. + * Update our in-core representation with the definitive values + * from the label. */ - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *unsup_feat; - - VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == - 0); - - for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, - NULL); nvp != NULL; - nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { - if (!zfeature_is_supported(nvpair_name(nvp))) { - VERIFY(nvlist_add_string(unsup_feat, - nvpair_name(nvp), "") == 0); - } - } + nvlist_free(spa->spa_label_features); + VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); +} + +nvlist_free(label); - if (!nvlist_empty(unsup_feat)) { - VERIFY(nvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); - nvlist_free(unsup_feat); - spa_load_failed(spa, "some features are unsupported"); - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); +/* + * Look through entries in the label nvlist's features_for_read. If + * there is a feature listed there which we don't understand then we + * cannot open a pool. + */ +if (ub->ub_version >= SPA_VERSION_FEATURES) { + nvlist_t *unsup_feat; + + VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == + 0); + + for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, + NULL); nvp != NULL; + nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { + if (!zfeature_is_supported(nvpair_name(nvp))) { + VERIFY(nvlist_add_string(unsup_feat, + nvpair_name(nvp), "") == 0); } + } + if (!nvlist_empty(unsup_feat)) { + VERIFY(nvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); nvlist_free(unsup_feat); + spa_load_failed(spa, "some features are unsupported"); + return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); } - if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_try_repair(spa, spa->spa_config); - spa_config_exit(spa, SCL_ALL, FTAG); - nvlist_free(spa->spa_config_splitting); - spa->spa_config_splitting = NULL; - } + nvlist_free(unsup_feat); +} - /* - * Initialize internal SPA structures. - */ - spa_ld_select_uberblock_done(spa, ub); +if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_try_repair(spa, spa->spa_config); + spa_config_exit(spa, SCL_ALL, FTAG); + nvlist_free(spa->spa_config_splitting); + spa->spa_config_splitting = NULL; +} - return (0); +/* + * Initialize internal SPA structures. + */ +spa_ld_select_uberblock_done(spa, ub); + +return (0); } static int spa_ld_open_rootbp(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); - if (error != 0) { - spa_load_failed(spa, "unable to open rootbp in dsl_pool_init " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; +error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); +if (error != 0) { + spa_load_failed(spa, "unable to open rootbp in dsl_pool_init " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} +spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; - return (0); +return (0); } static int spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, - boolean_t reloading) +boolean_t reloading) { - vdev_t *mrvd, *rvd = spa->spa_root_vdev; - nvlist_t *nv, *mos_config, *policy; - int error = 0, copy_error; - uint64_t healthy_tvds, healthy_tvds_mos; - uint64_t mos_config_txg; +vdev_t *mrvd, *rvd = spa->spa_root_vdev; +nvlist_t *nv, *mos_config, *policy; +int error = 0, copy_error; +uint64_t healthy_tvds, healthy_tvds_mos; +uint64_t mos_config_txg; - if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE) - != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE) + != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * If we're assembling a pool from a split, the config provided is - * already trusted so there is nothing to do. - */ - if (type == SPA_IMPORT_ASSEMBLE) - return (0); +/* + * If we're assembling a pool from a split, the config provided is + * already trusted so there is nothing to do. + */ +if (type == SPA_IMPORT_ASSEMBLE) + return (0); - healthy_tvds = spa_healthy_core_tvds(spa); +healthy_tvds = spa_healthy_core_tvds(spa); - if (load_nvlist(spa, spa->spa_config_object, &mos_config) - != 0) { - spa_load_failed(spa, "unable to retrieve MOS config"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (load_nvlist(spa, spa->spa_config_object, &mos_config) + != 0) { + spa_load_failed(spa, "unable to retrieve MOS config"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* - * If we are doing an open, pool owner wasn't verified yet, thus do - * the verification here. - */ - if (spa->spa_load_state == SPA_LOAD_OPEN) { - error = spa_verify_host(spa, mos_config); - if (error != 0) { - nvlist_free(mos_config); - return (error); - } +/* + * If we are doing an open, pool owner wasn't verified yet, thus do + * the verification here. + */ +if (spa->spa_load_state == SPA_LOAD_OPEN) { + error = spa_verify_host(spa, mos_config); + if (error != 0) { + nvlist_free(mos_config); + return (error); } +} - nv = fnvlist_lookup_nvlist(mos_config, ZPOOL_CONFIG_VDEV_TREE); +nv = fnvlist_lookup_nvlist(mos_config, ZPOOL_CONFIG_VDEV_TREE); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - /* - * Build a new vdev tree from the trusted config - */ - VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); +/* + * Build a new vdev tree from the trusted config + */ +VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); - /* - * Vdev paths in the MOS may be obsolete. If the untrusted config was - * obtained by scanning /dev/dsk, then it will have the right vdev - * paths. We update the trusted MOS config with this information. - * We first try to copy the paths with vdev_copy_path_strict, which - * succeeds only when both configs have exactly the same vdev tree. - * If that fails, we fall back to a more flexible method that has a - * best effort policy. - */ - copy_error = vdev_copy_path_strict(rvd, mrvd); - if (copy_error != 0 || spa_load_print_vdev_tree) { - spa_load_note(spa, "provided vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - spa_load_note(spa, "MOS vdev tree:"); - vdev_dbgmsg_print_tree(mrvd, 2); - } - if (copy_error != 0) { - spa_load_note(spa, "vdev_copy_path_strict failed, falling " - "back to vdev_copy_path_relaxed"); - vdev_copy_path_relaxed(rvd, mrvd); - } +/* + * Vdev paths in the MOS may be obsolete. If the untrusted config was + * obtained by scanning /dev/dsk, then it will have the right vdev + * paths. We update the trusted MOS config with this information. + * We first try to copy the paths with vdev_copy_path_strict, which + * succeeds only when both configs have exactly the same vdev tree. + * If that fails, we fall back to a more flexible method that has a + * best effort policy. + */ +copy_error = vdev_copy_path_strict(rvd, mrvd); +if (copy_error != 0 || spa_load_print_vdev_tree) { + spa_load_note(spa, "provided vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); + spa_load_note(spa, "MOS vdev tree:"); + vdev_dbgmsg_print_tree(mrvd, 2); +} +if (copy_error != 0) { + spa_load_note(spa, "vdev_copy_path_strict failed, falling " + "back to vdev_copy_path_relaxed"); + vdev_copy_path_relaxed(rvd, mrvd); +} - vdev_close(rvd); - vdev_free(rvd); - spa->spa_root_vdev = mrvd; - rvd = mrvd; - spa_config_exit(spa, SCL_ALL, FTAG); +vdev_close(rvd); +vdev_free(rvd); +spa->spa_root_vdev = mrvd; +rvd = mrvd; +spa_config_exit(spa, SCL_ALL, FTAG); - /* - * We will use spa_config if we decide to reload the spa or if spa_load - * fails and we rewind. We must thus regenerate the config using the - * MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to - * pass settings on how to load the pool and is not stored in the MOS. - * We copy it over to our new, trusted config. - */ - mos_config_txg = fnvlist_lookup_uint64(mos_config, - ZPOOL_CONFIG_POOL_TXG); - nvlist_free(mos_config); - mos_config = spa_config_generate(spa, NULL, mos_config_txg, B_FALSE); - if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY, - &policy) == 0) - fnvlist_add_nvlist(mos_config, ZPOOL_LOAD_POLICY, policy); - spa_config_set(spa, mos_config); - spa->spa_config_source = SPA_CONFIG_SRC_MOS; +/* + * We will use spa_config if we decide to reload the spa or if spa_load + * fails and we rewind. We must thus regenerate the config using the + * MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to + * pass settings on how to load the pool and is not stored in the MOS. + * We copy it over to our new, trusted config. + */ +mos_config_txg = fnvlist_lookup_uint64(mos_config, + ZPOOL_CONFIG_POOL_TXG); +nvlist_free(mos_config); +mos_config = spa_config_generate(spa, NULL, mos_config_txg, B_FALSE); +if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY, + &policy) == 0) + fnvlist_add_nvlist(mos_config, ZPOOL_LOAD_POLICY, policy); +spa_config_set(spa, mos_config); +spa->spa_config_source = SPA_CONFIG_SRC_MOS; - /* - * Now that we got the config from the MOS, we should be more strict - * in checking blkptrs and can make assumptions about the consistency - * of the vdev tree. spa_trust_config must be set to true before opening - * vdevs in order for them to be writeable. - */ - spa->spa_trust_config = B_TRUE; +/* + * Now that we got the config from the MOS, we should be more strict + * in checking blkptrs and can make assumptions about the consistency + * of the vdev tree. spa_trust_config must be set to true before opening + * vdevs in order for them to be writeable. + */ +spa->spa_trust_config = B_TRUE; - /* - * Open and validate the new vdev tree - */ - error = spa_ld_open_vdevs(spa); - if (error != 0) - return (error); +/* + * Open and validate the new vdev tree + */ +error = spa_ld_open_vdevs(spa); +if (error != 0) + return (error); - error = spa_ld_validate_vdevs(spa); - if (error != 0) - return (error); +error = spa_ld_validate_vdevs(spa); +if (error != 0) + return (error); - if (copy_error != 0 || spa_load_print_vdev_tree) { - spa_load_note(spa, "final vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - } +if (copy_error != 0 || spa_load_print_vdev_tree) { + spa_load_note(spa, "final vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); +} - if (spa->spa_load_state != SPA_LOAD_TRYIMPORT && - !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) { - /* - * Sanity check to make sure that we are indeed loading the - * latest uberblock. If we missed SPA_SYNC_MIN_VDEVS tvds - * in the config provided and they happened to be the only ones - * to have the latest uberblock, we could involuntarily perform - * an extreme rewind. - */ - healthy_tvds_mos = spa_healthy_core_tvds(spa); - if (healthy_tvds_mos - healthy_tvds >= - SPA_SYNC_MIN_VDEVS) { - spa_load_note(spa, "config provided misses too many " - "top-level vdevs compared to MOS (%lld vs %lld). ", - (u_longlong_t)healthy_tvds, - (u_longlong_t)healthy_tvds_mos); - spa_load_note(spa, "vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - if (reloading) { - spa_load_failed(spa, "config was already " - "provided from MOS. Aborting."); - return (spa_vdev_err(rvd, - VDEV_AUX_CORRUPT_DATA, EIO)); - } - spa_load_note(spa, "spa must be reloaded using MOS " - "config"); - return (SET_ERROR(EAGAIN)); +if (spa->spa_load_state != SPA_LOAD_TRYIMPORT && + !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) { + /* + * Sanity check to make sure that we are indeed loading the + * latest uberblock. If we missed SPA_SYNC_MIN_VDEVS tvds + * in the config provided and they happened to be the only ones + * to have the latest uberblock, we could involuntarily perform + * an extreme rewind. + */ + healthy_tvds_mos = spa_healthy_core_tvds(spa); + if (healthy_tvds_mos - healthy_tvds >= + SPA_SYNC_MIN_VDEVS) { + spa_load_note(spa, "config provided misses too many " + "top-level vdevs compared to MOS (%lld vs %lld). ", + (u_longlong_t)healthy_tvds, + (u_longlong_t)healthy_tvds_mos); + spa_load_note(spa, "vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); + if (reloading) { + spa_load_failed(spa, "config was already " + "provided from MOS. Aborting."); + return (spa_vdev_err(rvd, + VDEV_AUX_CORRUPT_DATA, EIO)); } + spa_load_note(spa, "spa must be reloaded using MOS " + "config"); + return (SET_ERROR(EAGAIN)); } +} - error = spa_check_for_missing_logs(spa); - if (error != 0) - return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); - - if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) { - spa_load_failed(spa, "uberblock guid sum doesn't match MOS " - "guid sum (%llu != %llu)", - (u_longlong_t)spa->spa_uberblock.ub_guid_sum, - (u_longlong_t)rvd->vdev_guid_sum); - return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, - ENXIO)); - } +error = spa_check_for_missing_logs(spa); +if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); - return (0); +if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) { + spa_load_failed(spa, "uberblock guid sum doesn't match MOS " + "guid sum (%llu != %llu)", + (u_longlong_t)spa->spa_uberblock.ub_guid_sum, + (u_longlong_t)rvd->vdev_guid_sum); + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, + ENXIO)); +} + +return (0); } static int spa_ld_open_indirect_vdev_metadata(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * Everything that we read before spa_remove_init() must be stored - * on concreted vdevs. Therefore we do this as early as possible. - */ - error = spa_remove_init(spa); - if (error != 0) { - spa_load_failed(spa, "spa_remove_init failed [error=%d]", - error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +/* + * Everything that we read before spa_remove_init() must be stored + * on concreted vdevs. Therefore we do this as early as possible. + */ +error = spa_remove_init(spa); +if (error != 0) { + spa_load_failed(spa, "spa_remove_init failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* - * Retrieve information needed to condense indirect vdev mappings. - */ - error = spa_condense_init(spa); - if (error != 0) { - spa_load_failed(spa, "spa_condense_init failed [error=%d]", - error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); - } +/* + * Retrieve information needed to condense indirect vdev mappings. + */ +error = spa_condense_init(spa); +if (error != 0) { + spa_load_failed(spa, "spa_condense_init failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); +} - return (0); +return (0); } static int spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; - - if (spa_version(spa) >= SPA_VERSION_FEATURES) { - boolean_t missing_feat_read = B_FALSE; - nvlist_t *unsup_feat, *enabled_feat; - - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, - &spa->spa_feat_for_read_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, - &spa->spa_feat_for_write_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (spa_version(spa) >= SPA_VERSION_FEATURES) { + boolean_t missing_feat_read = B_FALSE; + nvlist_t *unsup_feat, *enabled_feat; - if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, - &spa->spa_feat_desc_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } + if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, + &spa->spa_feat_for_read_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - enabled_feat = fnvlist_alloc(); - unsup_feat = fnvlist_alloc(); + if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, + &spa->spa_feat_for_write_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - if (!spa_features_check(spa, B_FALSE, - unsup_feat, enabled_feat)) - missing_feat_read = B_TRUE; + if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, + &spa->spa_feat_desc_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - if (spa_writeable(spa) || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) { - if (!spa_features_check(spa, B_TRUE, - unsup_feat, enabled_feat)) { - *missing_feat_writep = B_TRUE; - } - } + enabled_feat = fnvlist_alloc(); + unsup_feat = fnvlist_alloc(); - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); + if (!spa_features_check(spa, B_FALSE, + unsup_feat, enabled_feat)) + missing_feat_read = B_TRUE; - if (!nvlist_empty(unsup_feat)) { - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); + if (spa_writeable(spa) || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) { + if (!spa_features_check(spa, B_TRUE, + unsup_feat, enabled_feat)) { + *missing_feat_writep = B_TRUE; } + } - fnvlist_free(enabled_feat); - fnvlist_free(unsup_feat); + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); - if (!missing_feat_read) { - fnvlist_add_boolean(spa->spa_load_info, - ZPOOL_CONFIG_CAN_RDONLY); - } + if (!nvlist_empty(unsup_feat)) { + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); + } - /* - * If the state is SPA_LOAD_TRYIMPORT, our objective is - * twofold: to determine whether the pool is available for - * import in read-write mode and (if it is not) whether the - * pool is available for import in read-only mode. If the pool - * is available for import in read-write mode, it is displayed - * as available in userland; if it is not available for import - * in read-only mode, it is displayed as unavailable in - * userland. If the pool is available for import in read-only - * mode but not read-write mode, it is displayed as unavailable - * in userland with a special note that the pool is actually - * available for open in read-only mode. - * - * As a result, if the state is SPA_LOAD_TRYIMPORT and we are - * missing a feature for write, we must first determine whether - * the pool can be opened read-only before returning to - * userland in order to know whether to display the - * abovementioned note. - */ - if (missing_feat_read || (*missing_feat_writep && - spa_writeable(spa))) { - spa_load_failed(spa, "pool uses unsupported features"); - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } + fnvlist_free(enabled_feat); + fnvlist_free(unsup_feat); - /* - * Load refcounts for ZFS features from disk into an in-memory - * cache during SPA initialization. - */ - for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { - uint64_t refcount; - - error = feature_get_refcount_from_disk(spa, - &spa_feature_table[i], &refcount); - if (error == 0) { - spa->spa_feat_refcount_cache[i] = refcount; - } else if (error == ENOTSUP) { - spa->spa_feat_refcount_cache[i] = - SPA_FEATURE_DISABLED; - } else { - spa_load_failed(spa, "error getting refcount " - "for feature %s [error=%d]", - spa_feature_table[i].fi_guid, error); - return (spa_vdev_err(rvd, - VDEV_AUX_CORRUPT_DATA, EIO)); - } - } + if (!missing_feat_read) { + fnvlist_add_boolean(spa->spa_load_info, + ZPOOL_CONFIG_CAN_RDONLY); } - if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { - if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, - &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* + * If the state is SPA_LOAD_TRYIMPORT, our objective is + * twofold: to determine whether the pool is available for + * import in read-write mode and (if it is not) whether the + * pool is available for import in read-only mode. If the pool + * is available for import in read-write mode, it is displayed + * as available in userland; if it is not available for import + * in read-only mode, it is displayed as unavailable in + * userland. If the pool is available for import in read-only + * mode but not read-write mode, it is displayed as unavailable + * in userland with a special note that the pool is actually + * available for open in read-only mode. + * + * As a result, if the state is SPA_LOAD_TRYIMPORT and we are + * missing a feature for write, we must first determine whether + * the pool can be opened read-only before returning to + * userland in order to know whether to display the + * abovementioned note. + */ + if (missing_feat_read || (*missing_feat_writep && + spa_writeable(spa))) { + spa_load_failed(spa, "pool uses unsupported features"); + return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); } /* - * Encryption was added before bookmark_v2, even though bookmark_v2 - * is now a dependency. If this pool has encryption enabled without - * bookmark_v2, trigger an errata message. + * Load refcounts for ZFS features from disk into an in-memory + * cache during SPA initialization. */ - if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && - !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) { - spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; + for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { + uint64_t refcount; + + error = feature_get_refcount_from_disk(spa, + &spa_feature_table[i], &refcount); + if (error == 0) { + spa->spa_feat_refcount_cache[i] = refcount; + } else if (error == ENOTSUP) { + spa->spa_feat_refcount_cache[i] = + SPA_FEATURE_DISABLED; + } else { + spa_load_failed(spa, "error getting refcount " + "for feature %s [error=%d]", + spa_feature_table[i].fi_guid, error); + return (spa_vdev_err(rvd, + VDEV_AUX_CORRUPT_DATA, EIO)); + } } +} - return (0); +if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { + if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, + &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} + +/* + * Encryption was added before bookmark_v2, even though bookmark_v2 + * is now a dependency. If this pool has encryption enabled without + * bookmark_v2, trigger an errata message. + */ +if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && + !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) { + spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; +} + +return (0); } static int spa_ld_load_special_directories(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - spa->spa_is_initializing = B_TRUE; - error = dsl_pool_open(spa->spa_dsl_pool); - spa->spa_is_initializing = B_FALSE; - if (error != 0) { - spa_load_failed(spa, "dsl_pool_open failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +spa->spa_is_initializing = B_TRUE; +error = dsl_pool_open(spa->spa_dsl_pool); +spa->spa_is_initializing = B_FALSE; +if (error != 0) { + spa_load_failed(spa, "dsl_pool_open failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - return (0); +return (0); } static int spa_ld_get_props(spa_t *spa) { - int error = 0; - uint64_t obj; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +uint64_t obj; +vdev_t *rvd = spa->spa_root_vdev; + +/* Grab the checksum salt from the MOS. */ +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHECKSUM_SALT, 1, + sizeof (spa->spa_cksum_salt.zcs_bytes), + spa->spa_cksum_salt.zcs_bytes); +if (error == ENOENT) { + /* Generate a new salt for subsequent use */ + (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); +} else if (error != 0) { + spa_load_failed(spa, "unable to retrieve checksum salt from " + "MOS [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* Grab the checksum salt from the MOS. */ - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_CHECKSUM_SALT, 1, - sizeof (spa->spa_cksum_salt.zcs_bytes), - spa->spa_cksum_salt.zcs_bytes); - if (error == ENOENT) { - /* Generate a new salt for subsequent use */ - (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, - sizeof (spa->spa_cksum_salt.zcs_bytes)); - } else if (error != 0) { - spa_load_failed(spa, "unable to retrieve checksum salt from " - "MOS [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj, B_TRUE) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); +if (error != 0) { + spa_load_failed(spa, "error opening deferred-frees bpobj " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj, B_TRUE) != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); - if (error != 0) { - spa_load_failed(spa, "error opening deferred-frees bpobj " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +/* + * Load the bit that tells us to use the new accounting function + * (raid-z deflation). If we have an older pool, this will not + * be present. + */ +error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * Load the bit that tells us to use the new accounting function - * (raid-z deflation). If we have an older pool, this will not - * be present. - */ - error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, + &spa->spa_creation_version, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, - &spa->spa_creation_version, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * Load the persistent error log. If we have an older pool, this will + * not be present. + */ +error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last, + B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * Load the persistent error log. If we have an older pool, this will - * not be present. - */ - error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last, - B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, + &spa->spa_errlog_scrub, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, - &spa->spa_errlog_scrub, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * Load the history object. If we have an older pool, this + * will not be present. + */ +error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + +/* + * Load the per-vdev ZAP map. If we have an older pool, this will not + * be present; in this case, defer its creation to a later time to + * avoid dirtying the MOS this early / out of sync context. See + * spa_sync_config_object. + */ + +/* The sentinel is only available in the MOS config. */ +nvlist_t *mos_config; +if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { + spa_load_failed(spa, "unable to retrieve MOS config"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} + +error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, + &spa->spa_all_vdev_zaps, B_FALSE); +if (error == ENOENT) { + VERIFY(!nvlist_exists(mos_config, + ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); + spa->spa_avz_action = AVZ_ACTION_INITIALIZE; + ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); +} else if (error != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { /* - * Load the history object. If we have an older pool, this - * will not be present. + * An older version of ZFS overwrote the sentinel value, so + * we have orphaned per-vdev ZAPs in the MOS. Defer their + * destruction to later; see spa_sync_config_object. */ - error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - + spa->spa_avz_action = AVZ_ACTION_DESTROY; /* - * Load the per-vdev ZAP map. If we have an older pool, this will not - * be present; in this case, defer its creation to a later time to - * avoid dirtying the MOS this early / out of sync context. See - * spa_sync_config_object. + * We're assuming that no vdevs have had their ZAPs created + * before this. Better be sure of it. */ + ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); +} +nvlist_free(mos_config); - /* The sentinel is only available in the MOS config. */ - nvlist_t *mos_config; - if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { - spa_load_failed(spa, "unable to retrieve MOS config"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); - error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, - &spa->spa_all_vdev_zaps, B_FALSE); +error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object, + B_FALSE); +if (error && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == ENOENT) { - VERIFY(!nvlist_exists(mos_config, - ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); - spa->spa_avz_action = AVZ_ACTION_INITIALIZE; - ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); - } else if (error != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { - /* - * An older version of ZFS overwrote the sentinel value, so - * we have orphaned per-vdev ZAPs in the MOS. Defer their - * destruction to later; see spa_sync_config_object. - */ - spa->spa_avz_action = AVZ_ACTION_DESTROY; - /* - * We're assuming that no vdevs have had their ZAPs created - * before this. Better be sure of it. - */ - ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); - } - nvlist_free(mos_config); +if (error == 0) { + uint64_t autoreplace; - spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); + spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); + spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); + spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); + spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); + spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); + spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, + &spa->spa_dedup_ditto); + spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); + spa->spa_autoreplace = (autoreplace != 0); +} - error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object, - B_FALSE); - if (error && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * If we are importing a pool with missing top-level vdevs, + * we enforce that the pool doesn't panic or get suspended on + * error since the likelihood of missing data is extremely high. + */ +if (spa->spa_missing_tvds > 0 && + spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE && + spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + spa_load_note(spa, "forcing failmode to 'continue' " + "as some top level vdevs are missing"); + spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE; +} - if (error == 0) { - uint64_t autoreplace; +return (0); +} - spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); - spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); - spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); - spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); - spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); - spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); - spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, - &spa->spa_dedup_ditto); - spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); - spa->spa_autoreplace = (autoreplace != 0); - } +static int +spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type) +{ +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If we are importing a pool with missing top-level vdevs, - * we enforce that the pool doesn't panic or get suspended on - * error since the likelihood of missing data is extremely high. - */ - if (spa->spa_missing_tvds > 0 && - spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE && - spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - spa_load_note(spa, "forcing failmode to 'continue' " - "as some top level vdevs are missing"); - spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE; +/* + * If we're assembling the pool from the split-off vdevs of + * an existing pool, we don't want to attach the spares & cache + * devices. + */ + +/* + * Load any hot spares for this pool. + */ +error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object, + B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); + if (load_nvlist(spa, spa->spa_spares.sav_object, + &spa->spa_spares.sav_config) != 0) { + spa_load_failed(spa, "error loading spares nvlist"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } - return (0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); +} else if (error == 0) { + spa->spa_spares.sav_sync = B_TRUE; } -static int -spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type) -{ - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; - - /* - * If we're assembling the pool from the split-off vdevs of - * an existing pool, we don't want to attach the spares & cache - * devices. - */ - - /* - * Load any hot spares for this pool. - */ - error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object, - B_FALSE); - if (error != 0 && error != ENOENT) +/* + * Load any level 2 ARC devices for this pool. + */ +error = spa_dir_prop(spa, DMU_POOL_L2CACHE, + &spa->spa_l2cache.sav_object, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); + if (load_nvlist(spa, spa->spa_l2cache.sav_object, + &spa->spa_l2cache.sav_config) != 0) { + spa_load_failed(spa, "error loading l2cache nvlist"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); - if (load_nvlist(spa, spa->spa_spares.sav_object, - &spa->spa_spares.sav_config) != 0) { - spa_load_failed(spa, "error loading spares nvlist"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - } else if (error == 0) { - spa->spa_spares.sav_sync = B_TRUE; } - /* - * Load any level 2 ARC devices for this pool. - */ - error = spa_dir_prop(spa, DMU_POOL_L2CACHE, - &spa->spa_l2cache.sav_object, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); - if (load_nvlist(spa, spa->spa_l2cache.sav_object, - &spa->spa_l2cache.sav_config) != 0) { - spa_load_failed(spa, "error loading l2cache nvlist"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - } else if (error == 0) { - spa->spa_l2cache.sav_sync = B_TRUE; - } + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); +} else if (error == 0) { + spa->spa_l2cache.sav_sync = B_TRUE; +} - return (0); +return (0); } static int spa_ld_load_vdev_metadata(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If the 'multihost' property is set, then never allow a pool to - * be imported when the system hostid is zero. The exception to - * this rule is zdb which is always allowed to access pools. - */ - if (spa_multihost(spa) && spa_get_hostid(spa) == 0 && - (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); - return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); - } +/* + * If the 'multihost' property is set, then never allow a pool to + * be imported when the system hostid is zero. The exception to + * this rule is zdb which is always allowed to access pools. + */ +if (spa_multihost(spa) && spa_get_hostid(spa) == 0 && + (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); +} +/* + * If the 'autoreplace' property is set, then post a resource notifying + * the ZFS DE that it should not issue any faults for unopenable + * devices. We also iterate over the vdevs, and post a sysevent for any + * unopenable vdevs so that the normal autoreplace handler can take + * over. + */ +if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + spa_check_removed(spa->spa_root_vdev); /* - * If the 'autoreplace' property is set, then post a resource notifying - * the ZFS DE that it should not issue any faults for unopenable - * devices. We also iterate over the vdevs, and post a sysevent for any - * unopenable vdevs so that the normal autoreplace handler can take - * over. + * For the import case, this is done in spa_import(), because + * at this point we're using the spare definitions from + * the MOS config, not necessarily from the userland config. */ - if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - spa_check_removed(spa->spa_root_vdev); - /* - * For the import case, this is done in spa_import(), because - * at this point we're using the spare definitions from - * the MOS config, not necessarily from the userland config. - */ - if (spa->spa_load_state != SPA_LOAD_IMPORT) { - spa_aux_check_removed(&spa->spa_spares); - spa_aux_check_removed(&spa->spa_l2cache); - } + if (spa->spa_load_state != SPA_LOAD_IMPORT) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); } +} - /* - * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc. - */ - error = vdev_load(rvd); - if (error != 0) { - spa_load_failed(spa, "vdev_load failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); - } +/* + * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc. + */ +error = vdev_load(rvd); +if (error != 0) { + spa_load_failed(spa, "vdev_load failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); +} - /* - * Propagate the leaf DTLs we just loaded all the way up the vdev tree. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - vdev_dtl_reassess(rvd, 0, 0, B_FALSE); - spa_config_exit(spa, SCL_ALL, FTAG); +/* + * Propagate the leaf DTLs we just loaded all the way up the vdev tree. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +vdev_dtl_reassess(rvd, 0, 0, B_FALSE); +spa_config_exit(spa, SCL_ALL, FTAG); - return (0); +return (0); } static int spa_ld_load_dedup_tables(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - error = ddt_load(spa); - if (error != 0) { - spa_load_failed(spa, "ddt_load failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +error = ddt_load(spa); +if (error != 0) { + spa_load_failed(spa, "ddt_load failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - return (0); +return (0); } static int spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) { - boolean_t missing = spa_check_logs(spa); - if (missing) { - if (spa->spa_missing_tvds != 0) { - spa_load_note(spa, "spa_check_logs failed " - "so dropping the logs"); - } else { - *ereport = FM_EREPORT_ZFS_LOG_REPLAY; - spa_load_failed(spa, "spa_check_logs failed"); - return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, - ENXIO)); - } +if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) { + boolean_t missing = spa_check_logs(spa); + if (missing) { + if (spa->spa_missing_tvds != 0) { + spa_load_note(spa, "spa_check_logs failed " + "so dropping the logs"); + } else { + *ereport = FM_EREPORT_ZFS_LOG_REPLAY; + spa_load_failed(spa, "spa_check_logs failed"); + return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, + ENXIO)); } } +} - return (0); +return (0); } static int spa_ld_verify_pool_data(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * We've successfully opened the pool, verify that we're ready - * to start pushing transactions. - */ - if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - error = spa_load_verify(spa); - if (error != 0) { - spa_load_failed(spa, "spa_load_verify failed " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - error)); - } +/* + * We've successfully opened the pool, verify that we're ready + * to start pushing transactions. + */ +if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + error = spa_load_verify(spa); + if (error != 0) { + spa_load_failed(spa, "spa_load_verify failed " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + error)); } +} - return (0); +return (0); } static void spa_ld_claim_log_blocks(spa_t *spa) { - dmu_tx_t *tx; - dsl_pool_t *dp = spa_get_dsl(spa); +dmu_tx_t *tx; +dsl_pool_t *dp = spa_get_dsl(spa); - /* - * Claim log blocks that haven't been committed yet. - * This must all happen in a single txg. - * Note: spa_claim_max_txg is updated by spa_claim_notify(), - * invoked from zil_claim_log_block()'s i/o done callback. - * Price of rollback is that we abandon the log. - */ - spa->spa_claiming = B_TRUE; +/* + * Claim log blocks that haven't been committed yet. + * This must all happen in a single txg. + * Note: spa_claim_max_txg is updated by spa_claim_notify(), + * invoked from zil_claim_log_block()'s i/o done callback. + * Price of rollback is that we abandon the log. + */ +spa->spa_claiming = B_TRUE; - tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); - (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, - zil_claim, tx, DS_FIND_CHILDREN); - dmu_tx_commit(tx); +tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); +(void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_claim, tx, DS_FIND_CHILDREN); +dmu_tx_commit(tx); - spa->spa_claiming = B_FALSE; +spa->spa_claiming = B_FALSE; - spa_set_log_state(spa, SPA_LOG_GOOD); +spa_set_log_state(spa, SPA_LOG_GOOD); } static void spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, - boolean_t update_config_cache) +boolean_t update_config_cache) { - vdev_t *rvd = spa->spa_root_vdev; - int need_update = B_FALSE; +vdev_t *rvd = spa->spa_root_vdev; +int need_update = B_FALSE; - /* - * If the config cache is stale, or we have uninitialized - * metaslabs (see spa_vdev_add()), then update the config. - * - * If this is a verbatim import, trust the current - * in-core spa_config and update the disk labels. - */ - if (update_config_cache || config_cache_txg != spa->spa_config_txg || - spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_RECOVER || - (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) +/* + * If the config cache is stale, or we have uninitialized + * metaslabs (see spa_vdev_add()), then update the config. + * + * If this is a verbatim import, trust the current + * in-core spa_config and update the disk labels. + */ +if (update_config_cache || config_cache_txg != spa->spa_config_txg || + spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_RECOVER || + (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) + need_update = B_TRUE; + +for (int c = 0; c < rvd->vdev_children; c++) + if (rvd->vdev_child[c]->vdev_ms_array == 0) need_update = B_TRUE; - for (int c = 0; c < rvd->vdev_children; c++) - if (rvd->vdev_child[c]->vdev_ms_array == 0) - need_update = B_TRUE; - - /* - * Update the config cache asynchronously in case we're the - * root pool, in which case the config cache isn't writable yet. - */ - if (need_update) - spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); +/* + * Update the config cache asynchronously in case we're the + * root pool, in which case the config cache isn't writable yet. + */ +if (need_update) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } static void spa_ld_prepare_for_reload(spa_t *spa) { - int mode = spa->spa_mode; - int async_suspended = spa->spa_async_suspended; +int mode = spa->spa_mode; +int async_suspended = spa->spa_async_suspended; - spa_unload(spa); - spa_deactivate(spa); - spa_activate(spa, mode); +spa_unload(spa); +spa_deactivate(spa); +spa_activate(spa, mode); - /* - * We save the value of spa_async_suspended as it gets reset to 0 by - * spa_unload(). We want to restore it back to the original value before - * returning as we might be calling spa_async_resume() later. - */ - spa->spa_async_suspended = async_suspended; +/* + * We save the value of spa_async_suspended as it gets reset to 0 by + * spa_unload(). We want to restore it back to the original value before + * returning as we might be calling spa_async_resume() later. + */ +spa->spa_async_suspended = async_suspended; } static int spa_ld_read_checkpoint_txg(spa_t *spa) { - uberblock_t checkpoint; - int error = 0; +uberblock_t checkpoint; +int error = 0; - ASSERT0(spa->spa_checkpoint_txg); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT0(spa->spa_checkpoint_txg); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), - sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - if (error == ENOENT) - return (0); +if (error == ENOENT) + return (0); - if (error != 0) - return (error); +if (error != 0) + return (error); - ASSERT3U(checkpoint.ub_txg, !=, 0); - ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); - ASSERT3U(checkpoint.ub_timestamp, !=, 0); - spa->spa_checkpoint_txg = checkpoint.ub_txg; - spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; +ASSERT3U(checkpoint.ub_txg, !=, 0); +ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); +ASSERT3U(checkpoint.ub_timestamp, !=, 0); +spa->spa_checkpoint_txg = checkpoint.ub_txg; +spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; - return (0); +return (0); } static int spa_ld_mos_init(spa_t *spa, spa_import_type_t type) { - int error = 0; - - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); +int error = 0; - /* - * Never trust the config that is provided unless we are assembling - * a pool following a split. - * This means don't trust blkptrs and the vdev tree in general. This - * also effectively puts the spa in read-only mode since - * spa_writeable() checks for spa_trust_config to be true. - * We will later load a trusted config from the MOS. - */ - if (type != SPA_IMPORT_ASSEMBLE) - spa->spa_trust_config = B_FALSE; +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); - /* - * Parse the config provided to create a vdev tree. - */ - error = spa_ld_parse_config(spa, type); - if (error != 0) - return (error); +/* + * Never trust the config that is provided unless we are assembling + * a pool following a split. + * This means don't trust blkptrs and the vdev tree in general. This + * also effectively puts the spa in read-only mode since + * spa_writeable() checks for spa_trust_config to be true. + * We will later load a trusted config from the MOS. + */ +if (type != SPA_IMPORT_ASSEMBLE) + spa->spa_trust_config = B_FALSE; - spa_import_progress_add(spa); +/* + * Parse the config provided to create a vdev tree. + */ +error = spa_ld_parse_config(spa, type); +if (error != 0) + return (error); - /* - * Now that we have the vdev tree, try to open each vdev. This involves - * opening the underlying physical device, retrieving its geometry and - * probing the vdev with a dummy I/O. The state of each vdev will be set - * based on the success of those operations. After this we'll be ready - * to read from the vdevs. - */ - error = spa_ld_open_vdevs(spa); - if (error != 0) - return (error); +spa_import_progress_add(spa); - /* - * Read the label of each vdev and make sure that the GUIDs stored - * there match the GUIDs in the config provided. - * If we're assembling a new pool that's been split off from an - * existing pool, the labels haven't yet been updated so we skip - * validation for now. - */ - if (type != SPA_IMPORT_ASSEMBLE) { - error = spa_ld_validate_vdevs(spa); - if (error != 0) - return (error); - } +/* + * Now that we have the vdev tree, try to open each vdev. This involves + * opening the underlying physical device, retrieving its geometry and + * probing the vdev with a dummy I/O. The state of each vdev will be set + * based on the success of those operations. After this we'll be ready + * to read from the vdevs. + */ +error = spa_ld_open_vdevs(spa); +if (error != 0) + return (error); - /* - * Read all vdev labels to find the best uberblock (i.e. latest, - * unless spa_load_max_txg is set) and store it in spa_uberblock. We - * get the list of features required to read blkptrs in the MOS from - * the vdev label with the best uberblock and verify that our version - * of zfs supports them all. - */ - error = spa_ld_select_uberblock(spa, type); +/* + * Read the label of each vdev and make sure that the GUIDs stored + * there match the GUIDs in the config provided. + * If we're assembling a new pool that's been split off from an + * existing pool, the labels haven't yet been updated so we skip + * validation for now. + */ +if (type != SPA_IMPORT_ASSEMBLE) { + error = spa_ld_validate_vdevs(spa); if (error != 0) return (error); +} - /* - * Pass that uberblock to the dsl_pool layer which will open the root - * blkptr. This blkptr points to the latest version of the MOS and will - * allow us to read its contents. - */ - error = spa_ld_open_rootbp(spa); - if (error != 0) - return (error); +/* + * Read all vdev labels to find the best uberblock (i.e. latest, + * unless spa_load_max_txg is set) and store it in spa_uberblock. We + * get the list of features required to read blkptrs in the MOS from + * the vdev label with the best uberblock and verify that our version + * of zfs supports them all. + */ +error = spa_ld_select_uberblock(spa, type); +if (error != 0) + return (error); - return (0); +/* + * Pass that uberblock to the dsl_pool layer which will open the root + * blkptr. This blkptr points to the latest version of the MOS and will + * allow us to read its contents. + */ +error = spa_ld_open_rootbp(spa); +if (error != 0) + return (error); + +return (0); } static int spa_ld_checkpoint_rewind(spa_t *spa) { - uberblock_t checkpoint; - int error = 0; +uberblock_t checkpoint; +int error = 0; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), - sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - if (error != 0) { - spa_load_failed(spa, "unable to retrieve checkpointed " - "uberblock from the MOS config [error=%d]", error); +if (error != 0) { + spa_load_failed(spa, "unable to retrieve checkpointed " + "uberblock from the MOS config [error=%d]", error); - if (error == ENOENT) - error = ZFS_ERR_NO_CHECKPOINT; + if (error == ENOENT) + error = ZFS_ERR_NO_CHECKPOINT; - return (error); - } + return (error); +} - ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); - ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); +ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); +ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); - /* - * We need to update the txg and timestamp of the checkpointed - * uberblock to be higher than the latest one. This ensures that - * the checkpointed uberblock is selected if we were to close and - * reopen the pool right after we've written it in the vdev labels. - * (also see block comment in vdev_uberblock_compare) - */ - checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; - checkpoint.ub_timestamp = gethrestime_sec(); +/* + * We need to update the txg and timestamp of the checkpointed + * uberblock to be higher than the latest one. This ensures that + * the checkpointed uberblock is selected if we were to close and + * reopen the pool right after we've written it in the vdev labels. + * (also see block comment in vdev_uberblock_compare) + */ +checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; +checkpoint.ub_timestamp = gethrestime_sec(); - /* - * Set current uberblock to be the checkpointed uberblock. - */ - spa->spa_uberblock = checkpoint; +/* + * Set current uberblock to be the checkpointed uberblock. + */ +spa->spa_uberblock = checkpoint; - /* - * If we are doing a normal rewind, then the pool is open for - * writing and we sync the "updated" checkpointed uberblock to - * disk. Once this is done, we've basically rewound the whole - * pool and there is no way back. - * - * There are cases when we don't want to attempt and sync the - * checkpointed uberblock to disk because we are opening a - * pool as read-only. Specifically, verifying the checkpointed - * state with zdb, and importing the checkpointed state to get - * a "preview" of its content. - */ - if (spa_writeable(spa)) { - vdev_t *rvd = spa->spa_root_vdev; +/* + * If we are doing a normal rewind, then the pool is open for + * writing and we sync the "updated" checkpointed uberblock to + * disk. Once this is done, we've basically rewound the whole + * pool and there is no way back. + * + * There are cases when we don't want to attempt and sync the + * checkpointed uberblock to disk because we are opening a + * pool as read-only. Specifically, verifying the checkpointed + * state with zdb, and importing the checkpointed state to get + * a "preview" of its content. + */ +if (spa_writeable(spa)) { + vdev_t *rvd = spa->spa_root_vdev; - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; - int svdcount = 0; - int children = rvd->vdev_children; - int c0 = spa_get_random(children); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; + int svdcount = 0; + int children = rvd->vdev_children; + int c0 = spa_get_random(children); - for (int c = 0; c < children; c++) { - vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; + for (int c = 0; c < children; c++) { + vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; - /* Stop when revisiting the first vdev */ - if (c > 0 && svd[0] == vd) - break; + /* Stop when revisiting the first vdev */ + if (c > 0 && svd[0] == vd) + break; - if (vd->vdev_ms_array == 0 || vd->vdev_islog || - !vdev_is_concrete(vd)) - continue; + if (vd->vdev_ms_array == 0 || vd->vdev_islog || + !vdev_is_concrete(vd)) + continue; - svd[svdcount++] = vd; - if (svdcount == SPA_SYNC_MIN_VDEVS) - break; - } - error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); - if (error == 0) - spa->spa_last_synced_guid = rvd->vdev_guid; - spa_config_exit(spa, SCL_ALL, FTAG); + svd[svdcount++] = vd; + if (svdcount == SPA_SYNC_MIN_VDEVS) + break; + } + error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); + if (error == 0) + spa->spa_last_synced_guid = rvd->vdev_guid; + spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0) { - spa_load_failed(spa, "failed to write checkpointed " - "uberblock to the vdev labels [error=%d]", error); - return (error); - } + if (error != 0) { + spa_load_failed(spa, "failed to write checkpointed " + "uberblock to the vdev labels [error=%d]", error); + return (error); } +} - return (0); +return (0); } static int spa_ld_mos_with_trusted_config(spa_t *spa, spa_import_type_t type, - boolean_t *update_config_cache) +boolean_t *update_config_cache) { - int error; +int error; + +/* + * Parse the config for pool, open and validate vdevs, + * select an uberblock, and use that uberblock to open + * the MOS. + */ +error = spa_ld_mos_init(spa, type); +if (error != 0) + return (error); + +/* + * Retrieve the trusted config stored in the MOS and use it to create + * a new, exact version of the vdev tree, then reopen all vdevs. + */ +error = spa_ld_trusted_config(spa, type, B_FALSE); +if (error == EAGAIN) { + if (update_config_cache != NULL) + *update_config_cache = B_TRUE; /* - * Parse the config for pool, open and validate vdevs, - * select an uberblock, and use that uberblock to open - * the MOS. + * Redo the loading process with the trusted config if it is + * too different from the untrusted config. */ + spa_ld_prepare_for_reload(spa); + spa_load_note(spa, "RELOADING"); error = spa_ld_mos_init(spa, type); if (error != 0) return (error); - /* - * Retrieve the trusted config stored in the MOS and use it to create - * a new, exact version of the vdev tree, then reopen all vdevs. - */ - error = spa_ld_trusted_config(spa, type, B_FALSE); - if (error == EAGAIN) { - if (update_config_cache != NULL) - *update_config_cache = B_TRUE; - - /* - * Redo the loading process with the trusted config if it is - * too different from the untrusted config. - */ - spa_ld_prepare_for_reload(spa); - spa_load_note(spa, "RELOADING"); - error = spa_ld_mos_init(spa, type); - if (error != 0) - return (error); - - error = spa_ld_trusted_config(spa, type, B_TRUE); - if (error != 0) - return (error); - - } else if (error != 0) { + error = spa_ld_trusted_config(spa, type, B_TRUE); + if (error != 0) return (error); - } - return (0); +} else if (error != 0) { + return (error); +} + +return (0); } /* - * Load an existing storage pool, using the config provided. This config - * describes which vdevs are part of the pool and is later validated against - * partial configs present in each vdev's label and an entire copy of the - * config stored in the MOS. - */ +* Load an existing storage pool, using the config provided. This config +* describes which vdevs are part of the pool and is later validated against +* partial configs present in each vdev's label and an entire copy of the +* config stored in the MOS. +*/ static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) { - int error = 0; - boolean_t missing_feat_write = B_FALSE; - boolean_t checkpoint_rewind = - (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - boolean_t update_config_cache = B_FALSE; +int error = 0; +boolean_t missing_feat_write = B_FALSE; +boolean_t checkpoint_rewind = + (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); +boolean_t update_config_cache = B_FALSE; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); + +spa_load_note(spa, "LOADING"); - spa_load_note(spa, "LOADING"); +error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); +if (error != 0) + return (error); + +/* + * If we are rewinding to the checkpoint then we need to repeat + * everything we've done so far in this function but this time + * selecting the checkpointed uberblock and using that to open + * the MOS. + */ +if (checkpoint_rewind) { + /* + * If we are rewinding to the checkpoint update config cache + * anyway. + */ + update_config_cache = B_TRUE; - error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); + /* + * Extract the checkpointed uberblock from the current MOS + * and use this as the pool's uberblock from now on. If the + * pool is imported as writeable we also write the checkpoint + * uberblock to the labels, making the rewind permanent. + */ + error = spa_ld_checkpoint_rewind(spa); if (error != 0) return (error); /* - * If we are rewinding to the checkpoint then we need to repeat - * everything we've done so far in this function but this time - * selecting the checkpointed uberblock and using that to open - * the MOS. + * Redo the loading process again with the + * checkpointed uberblock. */ - if (checkpoint_rewind) { - /* - * If we are rewinding to the checkpoint update config cache - * anyway. - */ - update_config_cache = B_TRUE; + spa_ld_prepare_for_reload(spa); + spa_load_note(spa, "LOADING checkpointed uberblock"); + error = spa_ld_mos_with_trusted_config(spa, type, NULL); + if (error != 0) + return (error); +} - /* - * Extract the checkpointed uberblock from the current MOS - * and use this as the pool's uberblock from now on. If the - * pool is imported as writeable we also write the checkpoint - * uberblock to the labels, making the rewind permanent. - */ - error = spa_ld_checkpoint_rewind(spa); - if (error != 0) - return (error); +/* + * Retrieve the checkpoint txg if the pool has a checkpoint. + */ +error = spa_ld_read_checkpoint_txg(spa); +if (error != 0) + return (error); - /* - * Redo the loading process again with the - * checkpointed uberblock. - */ - spa_ld_prepare_for_reload(spa); - spa_load_note(spa, "LOADING checkpointed uberblock"); - error = spa_ld_mos_with_trusted_config(spa, type, NULL); - if (error != 0) - return (error); - } +/* + * Retrieve the mapping of indirect vdevs. Those vdevs were removed + * from the pool and their contents were re-mapped to other vdevs. Note + * that everything that we read before this step must have been + * rewritten on concrete vdevs after the last device removal was + * initiated. Otherwise we could be reading from indirect vdevs before + * we have loaded their mappings. + */ +error = spa_ld_open_indirect_vdev_metadata(spa); +if (error != 0) + return (error); - /* - * Retrieve the checkpoint txg if the pool has a checkpoint. - */ - error = spa_ld_read_checkpoint_txg(spa); - if (error != 0) - return (error); +/* + * Retrieve the full list of active features from the MOS and check if + * they are all supported. + */ +error = spa_ld_check_features(spa, &missing_feat_write); +if (error != 0) + return (error); + +/* + * Load several special directories from the MOS needed by the dsl_pool + * layer. + */ +error = spa_ld_load_special_directories(spa); +if (error != 0) + return (error); + +/* + * Retrieve pool properties from the MOS. + */ +error = spa_ld_get_props(spa); +if (error != 0) + return (error); + +/* + * Retrieve the list of auxiliary devices - cache devices and spares - + * and open them. + */ +error = spa_ld_open_aux_vdevs(spa, type); +if (error != 0) + return (error); + +/* + * Load the metadata for all vdevs. Also check if unopenable devices + * should be autoreplaced. + */ +error = spa_ld_load_vdev_metadata(spa); +if (error != 0) + return (error); + +error = spa_ld_load_dedup_tables(spa); +if (error != 0) + return (error); + +/* + * Verify the logs now to make sure we don't have any unexpected errors + * when we claim log blocks later. + */ +error = spa_ld_verify_logs(spa, type, ereport); +if (error != 0) + return (error); + +if (missing_feat_write) { + ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); /* - * Retrieve the mapping of indirect vdevs. Those vdevs were removed - * from the pool and their contents were re-mapped to other vdevs. Note - * that everything that we read before this step must have been - * rewritten on concrete vdevs after the last device removal was - * initiated. Otherwise we could be reading from indirect vdevs before - * we have loaded their mappings. + * At this point, we know that we can open the pool in + * read-only mode but not read-write mode. We now have enough + * information and can return to userland. */ - error = spa_ld_open_indirect_vdev_metadata(spa); - if (error != 0) - return (error); + return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); +} + +/* + * Traverse the last txgs to make sure the pool was left off in a safe + * state. When performing an extreme rewind, we verify the whole pool, + * which can take a very long time. + */ +error = spa_ld_verify_pool_data(spa); +if (error != 0) + return (error); + +/* + * Calculate the deflated space for the pool. This must be done before + * we write anything to the pool because we'd need to update the space + * accounting using the deflated sizes. + */ +spa_update_dspace(spa); + +/* + * We have now retrieved all the information we needed to open the + * pool. If we are importing the pool in read-write mode, a few + * additional steps must be performed to finish the import. + */ +if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || + spa->spa_load_max_txg == UINT64_MAX)) { + uint64_t config_cache_txg = spa->spa_config_txg; + + ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); /* - * Retrieve the full list of active features from the MOS and check if - * they are all supported. + * In case of a checkpoint rewind, log the original txg + * of the checkpointed uberblock. */ - error = spa_ld_check_features(spa, &missing_feat_write); - if (error != 0) - return (error); + if (checkpoint_rewind) { + spa_history_log_internal(spa, "checkpoint rewind", + NULL, "rewound state to txg=%llu", + (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); + } /* - * Load several special directories from the MOS needed by the dsl_pool - * layer. + * Traverse the ZIL and claim all blocks. */ - error = spa_ld_load_special_directories(spa); - if (error != 0) - return (error); + spa_ld_claim_log_blocks(spa); /* - * Retrieve pool properties from the MOS. + * Kick-off the syncing thread. */ - error = spa_ld_get_props(spa); - if (error != 0) - return (error); + spa->spa_sync_on = B_TRUE; + txg_sync_start(spa->spa_dsl_pool); + mmp_thread_start(spa); /* - * Retrieve the list of auxiliary devices - cache devices and spares - - * and open them. + * Wait for all claims to sync. We sync up to the highest + * claimed log block birth time so that claimed log blocks + * don't appear to be from the future. spa_claim_max_txg + * will have been set for us by ZIL traversal operations + * performed above. */ - error = spa_ld_open_aux_vdevs(spa, type); - if (error != 0) - return (error); + txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* - * Load the metadata for all vdevs. Also check if unopenable devices - * should be autoreplaced. + * Check if we need to request an update of the config. On the + * next sync, we would update the config stored in vdev labels + * and the cachefile (by default /etc/zfs/zpool.cache). */ - error = spa_ld_load_vdev_metadata(spa); - if (error != 0) - return (error); - - error = spa_ld_load_dedup_tables(spa); - if (error != 0) - return (error); + spa_ld_check_for_config_update(spa, config_cache_txg, + update_config_cache); /* - * Verify the logs now to make sure we don't have any unexpected errors - * when we claim log blocks later. + * Check all DTLs to see if anything needs resilvering. */ - error = spa_ld_verify_logs(spa, type, ereport); - if (error != 0) - return (error); - - if (missing_feat_write) { - ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); - - /* - * At this point, we know that we can open the pool in - * read-only mode but not read-write mode. We now have enough - * information and can return to userland. - */ - return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } + if (!dsl_scan_resilvering(spa->spa_dsl_pool) && + vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + spa_async_request(spa, SPA_ASYNC_RESILVER); /* - * Traverse the last txgs to make sure the pool was left off in a safe - * state. When performing an extreme rewind, we verify the whole pool, - * which can take a very long time. + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). */ - error = spa_ld_verify_pool_data(spa); - if (error != 0) - return (error); + spa_history_log_version(spa, "open", NULL); + + spa_restart_removal(spa); + spa_spawn_aux_threads(spa); /* - * Calculate the deflated space for the pool. This must be done before - * we write anything to the pool because we'd need to update the space - * accounting using the deflated sizes. + * Delete any inconsistent datasets. + * + * Note: + * Since we may be issuing deletes for clones here, + * we make sure to do so after we've spawned all the + * auxiliary threads above (from which the livelist + * deletion zthr is part of). */ - spa_update_dspace(spa); + (void) dmu_objset_find(spa_name(spa), + dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* - * We have now retrieved all the information we needed to open the - * pool. If we are importing the pool in read-write mode, a few - * additional steps must be performed to finish the import. + * Clean up any stale temporary dataset userrefs. */ - if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || - spa->spa_load_max_txg == UINT64_MAX)) { - uint64_t config_cache_txg = spa->spa_config_txg; - - ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); + dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); - /* - * In case of a checkpoint rewind, log the original txg - * of the checkpointed uberblock. - */ - if (checkpoint_rewind) { - spa_history_log_internal(spa, "checkpoint rewind", - NULL, "rewound state to txg=%llu", - (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); - } - - /* - * Traverse the ZIL and claim all blocks. - */ - spa_ld_claim_log_blocks(spa); - - /* - * Kick-off the syncing thread. - */ - spa->spa_sync_on = B_TRUE; - txg_sync_start(spa->spa_dsl_pool); - mmp_thread_start(spa); - - /* - * Wait for all claims to sync. We sync up to the highest - * claimed log block birth time so that claimed log blocks - * don't appear to be from the future. spa_claim_max_txg - * will have been set for us by ZIL traversal operations - * performed above. - */ - txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); - - /* - * Check if we need to request an update of the config. On the - * next sync, we would update the config stored in vdev labels - * and the cachefile (by default /etc/zfs/zpool.cache). - */ - spa_ld_check_for_config_update(spa, config_cache_txg, - update_config_cache); - - /* - * Check all DTLs to see if anything needs resilvering. - */ - if (!dsl_scan_resilvering(spa->spa_dsl_pool) && - vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - spa_async_request(spa, SPA_ASYNC_RESILVER); - - /* - * Log the fact that we booted up (so that we can detect if - * we rebooted in the middle of an operation). - */ - spa_history_log_version(spa, "open", NULL); - - spa_restart_removal(spa); - spa_spawn_aux_threads(spa); - - /* - * Delete any inconsistent datasets. - * - * Note: - * Since we may be issuing deletes for clones here, - * we make sure to do so after we've spawned all the - * auxiliary threads above (from which the livelist - * deletion zthr is part of). - */ - (void) dmu_objset_find(spa_name(spa), - dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); - - /* - * Clean up any stale temporary dataset userrefs. - */ - dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - vdev_initialize_restart(spa->spa_root_vdev); - vdev_trim_restart(spa->spa_root_vdev); - vdev_autotrim_restart(spa); - spa_config_exit(spa, SCL_CONFIG, FTAG); - } + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + vdev_initialize_restart(spa->spa_root_vdev); + vdev_trim_restart(spa->spa_root_vdev); + vdev_autotrim_restart(spa); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} - spa_import_progress_remove(spa_guid(spa)); - spa_load_note(spa, "LOADED"); +spa_import_progress_remove(spa_guid(spa)); +spa_load_note(spa, "LOADED"); - return (0); +return (0); } static int spa_load_retry(spa_t *spa, spa_load_state_t state) { - int mode = spa->spa_mode; +int mode = spa->spa_mode; - spa_unload(spa); - spa_deactivate(spa); +spa_unload(spa); +spa_deactivate(spa); - spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; +spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; - spa_activate(spa, mode); - spa_async_suspend(spa); +spa_activate(spa, mode); +spa_async_suspend(spa); - spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", - (u_longlong_t)spa->spa_load_max_txg); +spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", + (u_longlong_t)spa->spa_load_max_txg); - return (spa_load(spa, state, SPA_IMPORT_EXISTING)); +return (spa_load(spa, state, SPA_IMPORT_EXISTING)); } /* - * If spa_load() fails this function will try loading prior txg's. If - * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool - * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this - * function will not rewind the pool and will return the same error as - * spa_load(). - */ +* If spa_load() fails this function will try loading prior txg's. If +* 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool +* will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this +* function will not rewind the pool and will return the same error as +* spa_load(). +*/ static int spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, - int rewind_flags) -{ - nvlist_t *loadinfo = NULL; - nvlist_t *config = NULL; - int load_error, rewind_error; - uint64_t safe_rewind_txg; - uint64_t min_txg; - - if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { - spa->spa_load_max_txg = spa->spa_load_txg; - spa_set_log_state(spa, SPA_LOG_CLEAR); - } else { - spa->spa_load_max_txg = max_request; - if (max_request != UINT64_MAX) - spa->spa_extreme_rewind = B_TRUE; - } - - load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); - if (load_error == 0) - return (0); - if (load_error == ZFS_ERR_NO_CHECKPOINT) { - /* - * When attempting checkpoint-rewind on a pool with no - * checkpoint, we should not attempt to load uberblocks - * from previous txgs when spa_load fails. - */ - ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } - - if (spa->spa_root_vdev != NULL) - config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); +int rewind_flags) +{ +nvlist_t *loadinfo = NULL; +nvlist_t *config = NULL; +int load_error, rewind_error; +uint64_t safe_rewind_txg; +uint64_t min_txg; + +if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { + spa->spa_load_max_txg = spa->spa_load_txg; + spa_set_log_state(spa, SPA_LOG_CLEAR); +} else { + spa->spa_load_max_txg = max_request; + if (max_request != UINT64_MAX) + spa->spa_extreme_rewind = B_TRUE; +} - spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; - spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; +load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); +if (load_error == 0) + return (0); +if (load_error == ZFS_ERR_NO_CHECKPOINT) { + /* + * When attempting checkpoint-rewind on a pool with no + * checkpoint, we should not attempt to load uberblocks + * from previous txgs when spa_load fails. + */ + ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} - if (rewind_flags & ZPOOL_NEVER_REWIND) { - nvlist_free(config); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } +if (spa->spa_root_vdev != NULL) + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - if (state == SPA_LOAD_RECOVER) { - /* Price of rolling back is discarding txgs, including log */ - spa_set_log_state(spa, SPA_LOG_CLEAR); - } else { - /* - * If we aren't rolling back save the load info from our first - * import attempt so that we can restore it after attempting - * to rewind. - */ - loadinfo = spa->spa_load_info; - spa->spa_load_info = fnvlist_alloc(); - } +spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; +spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; - spa->spa_load_max_txg = spa->spa_last_ubsync_txg; - safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; - min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? - TXG_INITIAL : safe_rewind_txg; +if (rewind_flags & ZPOOL_NEVER_REWIND) { + nvlist_free(config); + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} +if (state == SPA_LOAD_RECOVER) { + /* Price of rolling back is discarding txgs, including log */ + spa_set_log_state(spa, SPA_LOG_CLEAR); +} else { /* - * Continue as long as we're finding errors, we're still within - * the acceptable rewind range, and we're still finding uberblocks + * If we aren't rolling back save the load info from our first + * import attempt so that we can restore it after attempting + * to rewind. */ - while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && - spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { - if (spa->spa_load_max_txg < safe_rewind_txg) - spa->spa_extreme_rewind = B_TRUE; - rewind_error = spa_load_retry(spa, state); - } + loadinfo = spa->spa_load_info; + spa->spa_load_info = fnvlist_alloc(); +} - spa->spa_extreme_rewind = B_FALSE; - spa->spa_load_max_txg = UINT64_MAX; +spa->spa_load_max_txg = spa->spa_last_ubsync_txg; +safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; +min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? + TXG_INITIAL : safe_rewind_txg; - if (config && (rewind_error || state != SPA_LOAD_RECOVER)) - spa_config_set(spa, config); - else - nvlist_free(config); +/* + * Continue as long as we're finding errors, we're still within + * the acceptable rewind range, and we're still finding uberblocks + */ +while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && + spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { + if (spa->spa_load_max_txg < safe_rewind_txg) + spa->spa_extreme_rewind = B_TRUE; + rewind_error = spa_load_retry(spa, state); +} - if (state == SPA_LOAD_RECOVER) { - ASSERT3P(loadinfo, ==, NULL); - spa_import_progress_remove(spa_guid(spa)); - return (rewind_error); - } else { - /* Store the rewind info as part of the initial load info */ - fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, - spa->spa_load_info); +spa->spa_extreme_rewind = B_FALSE; +spa->spa_load_max_txg = UINT64_MAX; - /* Restore the initial load info */ - fnvlist_free(spa->spa_load_info); - spa->spa_load_info = loadinfo; +if (config && (rewind_error || state != SPA_LOAD_RECOVER)) + spa_config_set(spa, config); +else + nvlist_free(config); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } +if (state == SPA_LOAD_RECOVER) { + ASSERT3P(loadinfo, ==, NULL); + spa_import_progress_remove(spa_guid(spa)); + return (rewind_error); +} else { + /* Store the rewind info as part of the initial load info */ + fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, + spa->spa_load_info); + + /* Restore the initial load info */ + fnvlist_free(spa->spa_load_info); + spa->spa_load_info = loadinfo; + + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} } /* - * Pool Open/Import - * - * The import case is identical to an open except that the configuration is sent - * down from userland, instead of grabbed from the configuration cache. For the - * case of an open, the pool configuration will exist in the - * POOL_STATE_UNINITIALIZED state. - * - * The stats information (gen/count/ustats) is used to gather vdev statistics at - * the same time open the pool, without having to keep around the spa_t in some - * ambiguous state. - */ +* Pool Open/Import +* +* The import case is identical to an open except that the configuration is sent +* down from userland, instead of grabbed from the configuration cache. For the +* case of an open, the pool configuration will exist in the +* POOL_STATE_UNINITIALIZED state. +* +* The stats information (gen/count/ustats) is used to gather vdev statistics at +* the same time open the pool, without having to keep around the spa_t in some +* ambiguous state. +*/ static int spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, - nvlist_t **config) +nvlist_t **config) { - spa_t *spa; - spa_load_state_t state = SPA_LOAD_OPEN; - int error; - int locked = B_FALSE; - int firstopen = B_FALSE; +spa_t *spa; +spa_load_state_t state = SPA_LOAD_OPEN; +int error; +int locked = B_FALSE; +int firstopen = B_FALSE; - *spapp = NULL; +*spapp = NULL; - /* - * As disgusting as this is, we need to support recursive calls to this - * function because dsl_dir_open() is called during spa_load(), and ends - * up calling spa_open() again. The real fix is to figure out how to - * avoid dsl_dir_open() calling this in the first place. - */ - if (MUTEX_NOT_HELD(&spa_namespace_lock)) { - mutex_enter(&spa_namespace_lock); - locked = B_TRUE; - } +/* + * As disgusting as this is, we need to support recursive calls to this + * function because dsl_dir_open() is called during spa_load(), and ends + * up calling spa_open() again. The real fix is to figure out how to + * avoid dsl_dir_open() calling this in the first place. + */ +if (MUTEX_NOT_HELD(&spa_namespace_lock)) { + mutex_enter(&spa_namespace_lock); + locked = B_TRUE; +} - if ((spa = spa_lookup(pool)) == NULL) { - if (locked) - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } +if ((spa = spa_lookup(pool)) == NULL) { + if (locked) + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); +} - if (spa->spa_state == POOL_STATE_UNINITIALIZED) { - zpool_load_policy_t policy; +if (spa->spa_state == POOL_STATE_UNINITIALIZED) { + zpool_load_policy_t policy; - firstopen = B_TRUE; + firstopen = B_TRUE; - zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config, - &policy); - if (policy.zlp_rewind & ZPOOL_DO_REWIND) - state = SPA_LOAD_RECOVER; + zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config, + &policy); + if (policy.zlp_rewind & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; - spa_activate(spa, spa_mode_global); + spa_activate(spa, spa_mode_global); - if (state != SPA_LOAD_RECOVER) - spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; - spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; + if (state != SPA_LOAD_RECOVER) + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; - zfs_dbgmsg("spa_open_common: opening %s", pool); - error = spa_load_best(spa, state, policy.zlp_txg, - policy.zlp_rewind); + zfs_dbgmsg("spa_open_common: opening %s", pool); + error = spa_load_best(spa, state, policy.zlp_txg, + policy.zlp_rewind); - if (error == EBADF) { - /* - * If vdev_validate() returns failure (indicated by - * EBADF), it indicates that one of the vdevs indicates - * that the pool has been exported or destroyed. If - * this is the case, the config cache is out of sync and - * we should remove the pool from the namespace. - */ - spa_unload(spa); - spa_deactivate(spa); - spa_write_cachefile(spa, B_TRUE, B_TRUE); - spa_remove(spa); - if (locked) - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } + if (error == EBADF) { + /* + * If vdev_validate() returns failure (indicated by + * EBADF), it indicates that one of the vdevs indicates + * that the pool has been exported or destroyed. If + * this is the case, the config cache is out of sync and + * we should remove the pool from the namespace. + */ + spa_unload(spa); + spa_deactivate(spa); + spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_remove(spa); + if (locked) + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); + } - if (error) { - /* - * We can't open the pool, but we still have useful - * information: the state of each vdev after the - * attempted vdev_open(). Return this to the user. - */ - if (config != NULL && spa->spa_config) { - VERIFY(nvlist_dup(spa->spa_config, config, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist(*config, - ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - } - spa_unload(spa); - spa_deactivate(spa); - spa->spa_last_open_failed = error; - if (locked) - mutex_exit(&spa_namespace_lock); - *spapp = NULL; - return (error); + if (error) { + /* + * We can't open the pool, but we still have useful + * information: the state of each vdev after the + * attempted vdev_open(). Return this to the user. + */ + if (config != NULL && spa->spa_config) { + VERIFY(nvlist_dup(spa->spa_config, config, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist(*config, + ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); } + spa_unload(spa); + spa_deactivate(spa); + spa->spa_last_open_failed = error; + if (locked) + mutex_exit(&spa_namespace_lock); + *spapp = NULL; + return (error); } +} - spa_open_ref(spa, tag); +spa_open_ref(spa, tag); - if (config != NULL) - *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); +if (config != NULL) + *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - /* - * If we've recovered the pool, pass back any information we - * gathered while doing the load. - */ - if (state == SPA_LOAD_RECOVER) { - VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - } +/* + * If we've recovered the pool, pass back any information we + * gathered while doing the load. + */ +if (state == SPA_LOAD_RECOVER) { + VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); +} - if (locked) { - spa->spa_last_open_failed = 0; - spa->spa_last_ubsync_txg = 0; - spa->spa_load_txg = 0; - mutex_exit(&spa_namespace_lock); - } +if (locked) { + spa->spa_last_open_failed = 0; + spa->spa_last_ubsync_txg = 0; + spa->spa_load_txg = 0; + mutex_exit(&spa_namespace_lock); +} - if (firstopen) - zvol_create_minors(spa, spa_name(spa), B_TRUE); +if (firstopen) + zvol_create_minors(spa, spa_name(spa), B_TRUE); - *spapp = spa; +*spapp = spa; - return (0); +return (0); } int spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, - nvlist_t **config) +nvlist_t **config) { - return (spa_open_common(name, spapp, tag, policy, config)); +return (spa_open_common(name, spapp, tag, policy, config)); } int spa_open(const char *name, spa_t **spapp, void *tag) { - return (spa_open_common(name, spapp, tag, NULL, NULL)); +return (spa_open_common(name, spapp, tag, NULL, NULL)); } /* - * Lookup the given spa_t, incrementing the inject count in the process, - * preventing it from being exported or destroyed. - */ +* Lookup the given spa_t, incrementing the inject count in the process, +* preventing it from being exported or destroyed. +*/ spa_t * spa_inject_addref(char *name) { - spa_t *spa; +spa_t *spa; - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(name)) == NULL) { - mutex_exit(&spa_namespace_lock); - return (NULL); - } - spa->spa_inject_ref++; +mutex_enter(&spa_namespace_lock); +if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); + return (NULL); +} +spa->spa_inject_ref++; +mutex_exit(&spa_namespace_lock); - return (spa); +return (spa); } void spa_inject_delref(spa_t *spa) { - mutex_enter(&spa_namespace_lock); - spa->spa_inject_ref--; - mutex_exit(&spa_namespace_lock); +mutex_enter(&spa_namespace_lock); +spa->spa_inject_ref--; +mutex_exit(&spa_namespace_lock); } /* - * Add spares device information to the nvlist. - */ +* Add spares device information to the nvlist. +*/ static void spa_add_spares(spa_t *spa, nvlist_t *config) { - nvlist_t **spares; - uint_t i, nspares; - nvlist_t *nvroot; - uint64_t guid; - vdev_stat_t *vs; - uint_t vsc; - uint64_t pool; - - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - - if (spa->spa_spares.sav_count == 0) - return; - - VERIFY(nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, +nvlist_t **spares; +uint_t i, nspares; +nvlist_t *nvroot; +uint64_t guid; +vdev_stat_t *vs; +uint_t vsc; +uint64_t pool; + +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + +if (spa->spa_spares.sav_count == 0) + return; + +VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); +VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); +if (nspares != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - if (nspares != 0) { - VERIFY(nvlist_add_nvlist_array(nvroot, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvroot, - ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - /* - * Go through and find any spares which have since been - * repurposed as an active spare. If this is the case, update - * their status appropriately. - */ - for (i = 0; i < nspares; i++) { - VERIFY(nvlist_lookup_uint64(spares[i], - ZPOOL_CONFIG_GUID, &guid) == 0); - if (spa_spare_exists(guid, &pool, NULL) && - pool != 0ULL) { - VERIFY(nvlist_lookup_uint64_array( - spares[i], ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - vs->vs_state = VDEV_STATE_CANT_OPEN; - vs->vs_aux = VDEV_AUX_SPARED; - } + /* + * Go through and find any spares which have since been + * repurposed as an active spare. If this is the case, update + * their status appropriately. + */ + for (i = 0; i < nspares; i++) { + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (spa_spare_exists(guid, &pool, NULL) && + pool != 0ULL) { + VERIFY(nvlist_lookup_uint64_array( + spares[i], ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + vs->vs_state = VDEV_STATE_CANT_OPEN; + vs->vs_aux = VDEV_AUX_SPARED; } } } +} /* - * Add l2cache device information to the nvlist, including vdev stats. - */ +* Add l2cache device information to the nvlist, including vdev stats. +*/ static void spa_add_l2cache(spa_t *spa, nvlist_t *config) { - nvlist_t **l2cache; - uint_t i, j, nl2cache; - nvlist_t *nvroot; - uint64_t guid; - vdev_t *vd; - vdev_stat_t *vs; - uint_t vsc; - - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - - if (spa->spa_l2cache.sav_count == 0) - return; - - VERIFY(nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, +nvlist_t **l2cache; +uint_t i, j, nl2cache; +nvlist_t *nvroot; +uint64_t guid; +vdev_t *vd; +vdev_stat_t *vs; +uint_t vsc; + +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + +if (spa->spa_l2cache.sav_count == 0) + return; + +VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); +VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); +if (nl2cache != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - if (nl2cache != 0) { - VERIFY(nvlist_add_nvlist_array(nvroot, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvroot, - ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - /* - * Update level 2 cache device stats. - */ - - for (i = 0; i < nl2cache; i++) { - VERIFY(nvlist_lookup_uint64(l2cache[i], - ZPOOL_CONFIG_GUID, &guid) == 0); + /* + * Update level 2 cache device stats. + */ - vd = NULL; - for (j = 0; j < spa->spa_l2cache.sav_count; j++) { - if (guid == - spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { - vd = spa->spa_l2cache.sav_vdevs[j]; - break; - } + for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + + vd = NULL; + for (j = 0; j < spa->spa_l2cache.sav_count; j++) { + if (guid == + spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { + vd = spa->spa_l2cache.sav_vdevs[j]; + break; } - ASSERT(vd != NULL); + } + ASSERT(vd != NULL); - VERIFY(nvlist_lookup_uint64_array(l2cache[i], - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) - == 0); - vdev_get_stats(vd, vs); - vdev_config_generate_stats(vd, l2cache[i]); + VERIFY(nvlist_lookup_uint64_array(l2cache[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); + vdev_get_stats(vd, vs); + vdev_config_generate_stats(vd, l2cache[i]); - } } } +} static void spa_feature_stats_from_disk(spa_t *spa, nvlist_t *features) { - zap_cursor_t zc; - zap_attribute_t za; +zap_cursor_t zc; +zap_attribute_t za; - if (spa->spa_feat_for_read_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_read_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); +if (spa->spa_feat_for_read_obj != 0) { + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_feat_for_read_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + ASSERT(za.za_integer_length == sizeof (uint64_t) && + za.za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za.za_name, + za.za_first_integer)); } + zap_cursor_fini(&zc); +} - if (spa->spa_feat_for_write_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_write_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); +if (spa->spa_feat_for_write_obj != 0) { + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_feat_for_write_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + ASSERT(za.za_integer_length == sizeof (uint64_t) && + za.za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za.za_name, + za.za_first_integer)); } + zap_cursor_fini(&zc); +} } static void spa_feature_stats_from_cache(spa_t *spa, nvlist_t *features) { - int i; +int i; - for (i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t feature = spa_feature_table[i]; - uint64_t refcount; +for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t feature = spa_feature_table[i]; + uint64_t refcount; - if (feature_get_refcount(spa, &feature, &refcount) != 0) - continue; + if (feature_get_refcount(spa, &feature, &refcount) != 0) + continue; - VERIFY0(nvlist_add_uint64(features, feature.fi_guid, refcount)); - } + VERIFY0(nvlist_add_uint64(features, feature.fi_guid, refcount)); +} } /* - * Store a list of pool features and their reference counts in the - * config. - * - * The first time this is called on a spa, allocate a new nvlist, fetch - * the pool features and reference counts from disk, then save the list - * in the spa. In subsequent calls on the same spa use the saved nvlist - * and refresh its values from the cached reference counts. This - * ensures we don't block here on I/O on a suspended pool so 'zpool - * clear' can resume the pool. - */ +* Store a list of pool features and their reference counts in the +* config. +* +* The first time this is called on a spa, allocate a new nvlist, fetch +* the pool features and reference counts from disk, then save the list +* in the spa. In subsequent calls on the same spa use the saved nvlist +* and refresh its values from the cached reference counts. This +* ensures we don't block here on I/O on a suspended pool so 'zpool +* clear' can resume the pool. +*/ static void spa_add_feature_stats(spa_t *spa, nvlist_t *config) { - nvlist_t *features; +nvlist_t *features; - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - mutex_enter(&spa->spa_feat_stats_lock); - features = spa->spa_feat_stats; +mutex_enter(&spa->spa_feat_stats_lock); +features = spa->spa_feat_stats; - if (features != NULL) { - spa_feature_stats_from_cache(spa, features); - } else { - VERIFY0(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP)); - spa->spa_feat_stats = features; - spa_feature_stats_from_disk(spa, features); - } +if (features != NULL) { + spa_feature_stats_from_cache(spa, features); +} else { + VERIFY0(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP)); + spa->spa_feat_stats = features; + spa_feature_stats_from_disk(spa, features); +} - VERIFY0(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, - features)); +VERIFY0(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, + features)); - mutex_exit(&spa->spa_feat_stats_lock); +mutex_exit(&spa->spa_feat_stats_lock); } int spa_get_stats(const char *name, nvlist_t **config, - char *altroot, size_t buflen) +char *altroot, size_t buflen) { - int error; - spa_t *spa; +int error; +spa_t *spa; - *config = NULL; - error = spa_open_common(name, &spa, FTAG, NULL, config); +*config = NULL; +error = spa_open_common(name, &spa, FTAG, NULL, config); - if (spa != NULL) { - /* - * This still leaves a window of inconsistency where the spares - * or l2cache devices could change and the config would be - * self-inconsistent. - */ - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); +if (spa != NULL) { + /* + * This still leaves a window of inconsistency where the spares + * or l2cache devices could change and the config would be + * self-inconsistent. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - if (*config != NULL) { - uint64_t loadtimes[2]; + if (*config != NULL) { + uint64_t loadtimes[2]; - loadtimes[0] = spa->spa_loaded_ts.tv_sec; - loadtimes[1] = spa->spa_loaded_ts.tv_nsec; - VERIFY(nvlist_add_uint64_array(*config, - ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); + loadtimes[0] = spa->spa_loaded_ts.tv_sec; + loadtimes[1] = spa->spa_loaded_ts.tv_nsec; + VERIFY(nvlist_add_uint64_array(*config, + ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_ERRCOUNT, - spa_get_errlog_size(spa)) == 0); - - if (spa_suspended(spa)) { - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_SUSPENDED, - spa->spa_failmode) == 0); - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_SUSPENDED_REASON, - spa->spa_suspended) == 0); - } + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_ERRCOUNT, + spa_get_errlog_size(spa)) == 0); - spa_add_spares(spa, *config); - spa_add_l2cache(spa, *config); - spa_add_feature_stats(spa, *config); + if (spa_suspended(spa)) { + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED, + spa->spa_failmode) == 0); + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED_REASON, + spa->spa_suspended) == 0); } + + spa_add_spares(spa, *config); + spa_add_l2cache(spa, *config); + spa_add_feature_stats(spa, *config); } +} - /* - * We want to get the alternate root even for faulted pools, so we cheat - * and call spa_lookup() directly. - */ - if (altroot) { - if (spa == NULL) { - mutex_enter(&spa_namespace_lock); - spa = spa_lookup(name); - if (spa) - spa_altroot(spa, altroot, buflen); - else - altroot[0] = '\0'; - spa = NULL; - mutex_exit(&spa_namespace_lock); - } else { +/* + * We want to get the alternate root even for faulted pools, so we cheat + * and call spa_lookup() directly. + */ +if (altroot) { + if (spa == NULL) { + mutex_enter(&spa_namespace_lock); + spa = spa_lookup(name); + if (spa) spa_altroot(spa, altroot, buflen); - } + else + altroot[0] = '\0'; + spa = NULL; + mutex_exit(&spa_namespace_lock); + } else { + spa_altroot(spa, altroot, buflen); } +} - if (spa != NULL) { - spa_config_exit(spa, SCL_CONFIG, FTAG); - spa_close(spa, FTAG); - } +if (spa != NULL) { + spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_close(spa, FTAG); +} - return (error); +return (error); } /* - * Validate that the auxiliary device array is well formed. We must have an - * array of nvlists, each which describes a valid leaf vdev. If this is an - * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be - * specified, as long as they are well-formed. - */ +* Validate that the auxiliary device array is well formed. We must have an +* array of nvlists, each which describes a valid leaf vdev. If this is an +* import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be +* specified, as long as they are well-formed. +*/ static int spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, - spa_aux_vdev_t *sav, const char *config, uint64_t version, - vdev_labeltype_t label) +spa_aux_vdev_t *sav, const char *config, uint64_t version, +vdev_labeltype_t label) { - nvlist_t **dev; - uint_t i, ndev; - vdev_t *vd; - int error; +nvlist_t **dev; +uint_t i, ndev; +vdev_t *vd; +int error; - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - /* - * It's acceptable to have no devs specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) - return (0); +/* + * It's acceptable to have no devs specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) + return (0); - if (ndev == 0) - return (SET_ERROR(EINVAL)); +if (ndev == 0) + return (SET_ERROR(EINVAL)); - /* - * Make sure the pool is formatted with a version that supports this - * device type. - */ - if (spa_version(spa) < version) - return (SET_ERROR(ENOTSUP)); +/* + * Make sure the pool is formatted with a version that supports this + * device type. + */ +if (spa_version(spa) < version) + return (SET_ERROR(ENOTSUP)); - /* - * Set the pending device list so we correctly handle device in-use - * checking. - */ - sav->sav_pending = dev; - sav->sav_npending = ndev; +/* + * Set the pending device list so we correctly handle device in-use + * checking. + */ +sav->sav_pending = dev; +sav->sav_npending = ndev; - for (i = 0; i < ndev; i++) { - if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, - mode)) != 0) - goto out; +for (i = 0; i < ndev; i++) { + if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, + mode)) != 0) + goto out; - if (!vd->vdev_ops->vdev_op_leaf) { - vdev_free(vd); - error = SET_ERROR(EINVAL); - goto out; - } + if (!vd->vdev_ops->vdev_op_leaf) { + vdev_free(vd); + error = SET_ERROR(EINVAL); + goto out; + } - vd->vdev_top = vd; + vd->vdev_top = vd; - if ((error = vdev_open(vd)) == 0 && - (error = vdev_label_init(vd, crtxg, label)) == 0) { - VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - } + if ((error = vdev_open(vd)) == 0 && + (error = vdev_label_init(vd, crtxg, label)) == 0) { + VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } - vdev_free(vd); + vdev_free(vd); - if (error && - (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) - goto out; - else - error = 0; - } + if (error && + (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) + goto out; + else + error = 0; +} out: - sav->sav_pending = NULL; - sav->sav_npending = 0; - return (error); +sav->sav_pending = NULL; +sav->sav_npending = 0; +return (error); } static int spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) { - int error; +int error; - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, - &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, - VDEV_LABEL_SPARE)) != 0) { - return (error); - } +if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, + VDEV_LABEL_SPARE)) != 0) { + return (error); +} - return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, - &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, - VDEV_LABEL_L2CACHE)); +return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, + VDEV_LABEL_L2CACHE)); } static void spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, - const char *config) +const char *config) { - int i; +int i; - if (sav->sav_config != NULL) { - nvlist_t **olddevs; - uint_t oldndevs; - nvlist_t **newdevs; +if (sav->sav_config != NULL) { + nvlist_t **olddevs; + uint_t oldndevs; + nvlist_t **newdevs; - /* - * Generate new dev list by concatenating with the - * current dev list. - */ - VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, - &olddevs, &oldndevs) == 0); + /* + * Generate new dev list by concatenating with the + * current dev list. + */ + VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, + &olddevs, &oldndevs) == 0); - newdevs = kmem_alloc(sizeof (void *) * - (ndevs + oldndevs), KM_SLEEP); - for (i = 0; i < oldndevs; i++) - VERIFY(nvlist_dup(olddevs[i], &newdevs[i], - KM_SLEEP) == 0); - for (i = 0; i < ndevs; i++) - VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], - KM_SLEEP) == 0); + newdevs = kmem_alloc(sizeof (void *) * + (ndevs + oldndevs), KM_SLEEP); + for (i = 0; i < oldndevs; i++) + VERIFY(nvlist_dup(olddevs[i], &newdevs[i], + KM_SLEEP) == 0); + for (i = 0; i < ndevs; i++) + VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], + KM_SLEEP) == 0); - VERIFY(nvlist_remove(sav->sav_config, config, - DATA_TYPE_NVLIST_ARRAY) == 0); + VERIFY(nvlist_remove(sav->sav_config, config, + DATA_TYPE_NVLIST_ARRAY) == 0); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, - config, newdevs, ndevs + oldndevs) == 0); - for (i = 0; i < oldndevs + ndevs; i++) - nvlist_free(newdevs[i]); - kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); - } else { - /* - * Generate a new dev list. - */ - VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, - devs, ndevs) == 0); - } + VERIFY(nvlist_add_nvlist_array(sav->sav_config, + config, newdevs, ndevs + oldndevs) == 0); + for (i = 0; i < oldndevs + ndevs; i++) + nvlist_free(newdevs[i]); + kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); +} else { + /* + * Generate a new dev list. + */ + VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, + devs, ndevs) == 0); +} } /* - * Stop and drop level 2 ARC devices - */ +* Stop and drop level 2 ARC devices +*/ void spa_l2cache_drop(spa_t *spa) { - vdev_t *vd; - int i; - spa_aux_vdev_t *sav = &spa->spa_l2cache; +vdev_t *vd; +int i; +spa_aux_vdev_t *sav = &spa->spa_l2cache; - for (i = 0; i < sav->sav_count; i++) { - uint64_t pool; +for (i = 0; i < sav->sav_count; i++) { + uint64_t pool; - vd = sav->sav_vdevs[i]; - ASSERT(vd != NULL); + vd = sav->sav_vdevs[i]; + ASSERT(vd != NULL); - if (spa_l2cache_exists(vd->vdev_guid, &pool) && - pool != 0ULL && l2arc_vdev_present(vd)) - l2arc_remove_vdev(vd); - } + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); +} } /* - * Verify encryption parameters for spa creation. If we are encrypting, we must - * have the encryption feature flag enabled. - */ +* Verify encryption parameters for spa creation. If we are encrypting, we must +* have the encryption feature flag enabled. +*/ static int spa_create_check_encryption_params(dsl_crypto_params_t *dcp, - boolean_t has_encryption) +boolean_t has_encryption) { - if (dcp->cp_crypt != ZIO_CRYPT_OFF && - dcp->cp_crypt != ZIO_CRYPT_INHERIT && - !has_encryption) - return (SET_ERROR(ENOTSUP)); +if (dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT && + !has_encryption) + return (SET_ERROR(ENOTSUP)); - return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); +return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); } /* - * Pool Creation - */ +* Pool Creation +*/ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops, dsl_crypto_params_t *dcp) -{ - spa_t *spa; - char *altroot = NULL; - vdev_t *rvd; - dsl_pool_t *dp; - dmu_tx_t *tx; - int error = 0; - uint64_t txg = TXG_INITIAL; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - uint64_t version, obj; - boolean_t has_features; - boolean_t has_encryption; - boolean_t has_allocclass; - spa_feature_t feat; - char *feat_name; - char *poolname; - nvlist_t *nvl; - - if (props == NULL || - nvlist_lookup_string(props, "tname", &poolname) != 0) - poolname = (char *)pool; +nvlist_t *zplprops, dsl_crypto_params_t *dcp) +{ +spa_t *spa; +char *altroot = NULL; +vdev_t *rvd; +dsl_pool_t *dp; +dmu_tx_t *tx; +int error = 0; +uint64_t txg = TXG_INITIAL; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; +uint64_t version, obj; +boolean_t has_features; +boolean_t has_encryption; +boolean_t has_allocclass; +spa_feature_t feat; +char *feat_name; +char *poolname; +nvlist_t *nvl; + +if (props == NULL || + nvlist_lookup_string(props, "tname", &poolname) != 0) + poolname = (char *)pool; - /* - * If this pool already exists, return failure. - */ - mutex_enter(&spa_namespace_lock); - if (spa_lookup(poolname) != NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EEXIST)); - } +/* + * If this pool already exists, return failure. + */ +mutex_enter(&spa_namespace_lock); +if (spa_lookup(poolname) != NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EEXIST)); +} - /* - * Allocate a new spa_t structure. - */ - nvl = fnvlist_alloc(); - fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); - (void) nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); - spa = spa_add(poolname, nvl, altroot); - fnvlist_free(nvl); - spa_activate(spa, spa_mode_global); +/* + * Allocate a new spa_t structure. + */ +nvl = fnvlist_alloc(); +fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); +(void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); +spa = spa_add(poolname, nvl, altroot); +fnvlist_free(nvl); +spa_activate(spa, spa_mode_global); + +if (props && (error = spa_prop_validate(spa, props))) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} - if (props && (error = spa_prop_validate(spa, props))) { - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } +/* + * Temporary pool names should never be written to disk. + */ +if (poolname != pool) + spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; - /* - * Temporary pool names should never be written to disk. - */ - if (poolname != pool) - spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; - - has_features = B_FALSE; - has_encryption = B_FALSE; - has_allocclass = B_FALSE; - for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); - elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) { - has_features = B_TRUE; - - feat_name = strchr(nvpair_name(elem), '@') + 1; - VERIFY0(zfeature_lookup_name(feat_name, &feat)); - if (feat == SPA_FEATURE_ENCRYPTION) - has_encryption = B_TRUE; - if (feat == SPA_FEATURE_ALLOCATION_CLASSES) - has_allocclass = B_TRUE; - } - } +has_features = B_FALSE; +has_encryption = B_FALSE; +has_allocclass = B_FALSE; +for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); + elem != NULL; elem = nvlist_next_nvpair(props, elem)) { + if (zpool_prop_feature(nvpair_name(elem))) { + has_features = B_TRUE; - /* verify encryption params, if they were provided */ - if (dcp != NULL) { - error = spa_create_check_encryption_params(dcp, has_encryption); - if (error != 0) { - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } + feat_name = strchr(nvpair_name(elem), '@') + 1; + VERIFY0(zfeature_lookup_name(feat_name, &feat)); + if (feat == SPA_FEATURE_ENCRYPTION) + has_encryption = B_TRUE; + if (feat == SPA_FEATURE_ALLOCATION_CLASSES) + has_allocclass = B_TRUE; } - if (!has_allocclass && zfs_special_devs(nvroot)) { +} + +/* verify encryption params, if they were provided */ +if (dcp != NULL) { + error = spa_create_check_encryption_params(dcp, has_encryption); + if (error != 0) { spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); - return (ENOTSUP); + return (error); } +} +if (!has_allocclass && zfs_special_devs(nvroot)) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (ENOTSUP); +} - if (has_features || nvlist_lookup_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { - version = SPA_VERSION; - } - ASSERT(SPA_VERSION_IS_SUPPORTED(version)); +if (has_features || nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { + version = SPA_VERSION; +} +ASSERT(SPA_VERSION_IS_SUPPORTED(version)); - spa->spa_first_txg = txg; - spa->spa_uberblock.ub_txg = txg - 1; - spa->spa_uberblock.ub_version = version; - spa->spa_ubsync = spa->spa_uberblock; - spa->spa_load_state = SPA_LOAD_CREATE; - spa->spa_removing_phys.sr_state = DSS_NONE; - spa->spa_removing_phys.sr_removing_vdev = -1; - spa->spa_removing_phys.sr_prev_indirect_vdev = -1; - spa->spa_indirect_vdevs_loaded = B_TRUE; +spa->spa_first_txg = txg; +spa->spa_uberblock.ub_txg = txg - 1; +spa->spa_uberblock.ub_version = version; +spa->spa_ubsync = spa->spa_uberblock; +spa->spa_load_state = SPA_LOAD_CREATE; +spa->spa_removing_phys.sr_state = DSS_NONE; +spa->spa_removing_phys.sr_removing_vdev = -1; +spa->spa_removing_phys.sr_prev_indirect_vdev = -1; +spa->spa_indirect_vdevs_loaded = B_TRUE; - /* - * Create "The Godfather" zio to hold all async IOs - */ - spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), - KM_SLEEP); - for (int i = 0; i < max_ncpus; i++) { - spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); - } +/* + * Create "The Godfather" zio to hold all async IOs + */ +spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); +for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); +} - /* - * Create the root vdev. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +/* + * Create the root vdev. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); +error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); - ASSERT(error != 0 || rvd != NULL); - ASSERT(error != 0 || spa->spa_root_vdev == rvd); +ASSERT(error != 0 || rvd != NULL); +ASSERT(error != 0 || spa->spa_root_vdev == rvd); - if (error == 0 && !zfs_allocatable_devs(nvroot)) - error = SET_ERROR(EINVAL); +if (error == 0 && !zfs_allocatable_devs(nvroot)) + error = SET_ERROR(EINVAL); - if (error == 0 && - (error = vdev_create(rvd, txg, B_FALSE)) == 0 && - (error = spa_validate_aux(spa, nvroot, txg, - VDEV_ALLOC_ADD)) == 0) { - /* - * instantiate the metaslab groups (this will dirty the vdevs) - * we can no longer error exit past this point - */ - for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { - vdev_t *vd = rvd->vdev_child[c]; +if (error == 0 && + (error = vdev_create(rvd, txg, B_FALSE)) == 0 && + (error = spa_validate_aux(spa, nvroot, txg, + VDEV_ALLOC_ADD)) == 0) { + /* + * instantiate the metaslab groups (this will dirty the vdevs) + * we can no longer error exit past this point + */ + for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; - vdev_metaslab_set_size(vd); - vdev_expand(vd, txg); - } + vdev_metaslab_set_size(vd); + vdev_expand(vd, txg); } +} - spa_config_exit(spa, SCL_ALL, FTAG); +spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0) { - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } +if (error != 0) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} - /* - * Get the list of spares, if specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_spares.sav_sync = B_TRUE; - } +/* + * Get the list of spares, if specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; +} - /* - * Get the list of level 2 cache devices, if specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_l2cache.sav_sync = B_TRUE; - } +/* + * Get the list of level 2 cache devices, if specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; +} - spa->spa_is_initializing = B_TRUE; - spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); - spa->spa_is_initializing = B_FALSE; +spa->spa_is_initializing = B_TRUE; +spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); +spa->spa_is_initializing = B_FALSE; - /* - * Create DDTs (dedup tables). - */ - ddt_create(spa); +/* + * Create DDTs (dedup tables). + */ +ddt_create(spa); - spa_update_dspace(spa); +spa_update_dspace(spa); - tx = dmu_tx_create_assigned(dp, txg); +tx = dmu_tx_create_assigned(dp, txg); - /* - * Create the pool's history object. - */ - if (version >= SPA_VERSION_ZPOOL_HISTORY && !spa->spa_history) - spa_history_create_obj(spa, tx); +/* + * Create the pool's history object. + */ +if (version >= SPA_VERSION_ZPOOL_HISTORY && !spa->spa_history) + spa_history_create_obj(spa, tx); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); - spa_history_log_version(spa, "create", tx); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); +spa_history_log_version(spa, "create", tx); - /* - * Create the pool config object. - */ - spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, - DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, - DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); +/* + * Create the pool config object. + */ +spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, + DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, - sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { - cmn_err(CE_PANIC, "failed to add pool config"); - } +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, + sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool config"); +} - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, - sizeof (uint64_t), 1, &version, tx) != 0) { - cmn_err(CE_PANIC, "failed to add pool version"); - } - - /* Newly created pools with the right version are always deflated. */ - if (version >= SPA_VERSION_RAIDZ_DEFLATE) { - spa->spa_deflate = TRUE; - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, - sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { - cmn_err(CE_PANIC, "failed to add deflate"); - } - } +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, + sizeof (uint64_t), 1, &version, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool version"); +} - /* - * Create the deferred-free bpobj. Turn off compression - * because sync-to-convergence takes longer if the blocksize - * keeps changing. - */ - obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); - dmu_object_set_compress(spa->spa_meta_objset, obj, - ZIO_COMPRESS_OFF, tx); +/* Newly created pools with the right version are always deflated. */ +if (version >= SPA_VERSION_RAIDZ_DEFLATE) { + spa->spa_deflate = TRUE; if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, - sizeof (uint64_t), 1, &obj, tx) != 0) { - cmn_err(CE_PANIC, "failed to add bpobj"); + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { + cmn_err(CE_PANIC, "failed to add deflate"); } - VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, - spa->spa_meta_objset, obj)); +} - /* - * Generate some random noise for salted checksums to operate on. - */ - (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, - sizeof (spa->spa_cksum_salt.zcs_bytes)); +/* + * Create the deferred-free bpobj. Turn off compression + * because sync-to-convergence takes longer if the blocksize + * keeps changing. + */ +obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); +dmu_object_set_compress(spa->spa_meta_objset, obj, + ZIO_COMPRESS_OFF, tx); +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, + sizeof (uint64_t), 1, &obj, tx) != 0) { + cmn_err(CE_PANIC, "failed to add bpobj"); +} +VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, + spa->spa_meta_objset, obj)); - /* - * Set pool properties. - */ - spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); - spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); - spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); - spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); - spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); - spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); +/* + * Generate some random noise for salted checksums to operate on. + */ +(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); - if (props != NULL) { - spa_configfile_set(spa, props, B_FALSE); - spa_sync_props(props, tx); - } +/* + * Set pool properties. + */ +spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); +spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); +spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); +spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); +spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); +spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); - dmu_tx_commit(tx); +if (props != NULL) { + spa_configfile_set(spa, props, B_FALSE); + spa_sync_props(props, tx); +} - spa->spa_sync_on = B_TRUE; - txg_sync_start(dp); - mmp_thread_start(spa); - txg_wait_synced(dp, txg); +dmu_tx_commit(tx); - spa_spawn_aux_threads(spa); +spa->spa_sync_on = B_TRUE; +txg_sync_start(dp); +mmp_thread_start(spa); +txg_wait_synced(dp, txg); - spa_write_cachefile(spa, B_FALSE, B_TRUE); +spa_spawn_aux_threads(spa); - /* - * Don't count references from objsets that are already closed - * and are making their way through the eviction process. - */ - spa_evicting_os_wait(spa); - spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); - spa->spa_load_state = SPA_LOAD_NONE; +spa_write_cachefile(spa, B_FALSE, B_TRUE); - mutex_exit(&spa_namespace_lock); +/* + * Don't count references from objsets that are already closed + * and are making their way through the eviction process. + */ +spa_evicting_os_wait(spa); +spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); +spa->spa_load_state = SPA_LOAD_NONE; - return (0); +mutex_exit(&spa_namespace_lock); + +return (0); } /* - * Import a non-root pool into the system. - */ +* Import a non-root pool into the system. +*/ int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) { - spa_t *spa; - char *altroot = NULL; - spa_load_state_t state = SPA_LOAD_IMPORT; - zpool_load_policy_t policy; - uint64_t mode = spa_mode_global; - uint64_t readonly = B_FALSE; - int error; - nvlist_t *nvroot; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - - /* - * If a pool with this name exists, return failure. - */ - mutex_enter(&spa_namespace_lock); - if (spa_lookup(pool) != NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EEXIST)); - } +spa_t *spa; +char *altroot = NULL; +spa_load_state_t state = SPA_LOAD_IMPORT; +zpool_load_policy_t policy; +uint64_t mode = spa_mode_global; +uint64_t readonly = B_FALSE; +int error; +nvlist_t *nvroot; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; - /* - * Create and initialize the spa structure. - */ - (void) nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); - (void) nvlist_lookup_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); - if (readonly) - mode = FREAD; - spa = spa_add(pool, config, altroot); - spa->spa_import_flags = flags; +/* + * If a pool with this name exists, return failure. + */ +mutex_enter(&spa_namespace_lock); +if (spa_lookup(pool) != NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EEXIST)); +} - /* - * Verbatim import - Take a pool and insert it into the namespace - * as if it had been loaded at boot. - */ - if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { - if (props != NULL) - spa_configfile_set(spa, props, B_FALSE); +/* + * Create and initialize the spa structure. + */ +(void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); +(void) nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); +if (readonly) + mode = FREAD; +spa = spa_add(pool, config, altroot); +spa->spa_import_flags = flags; - spa_write_cachefile(spa, B_FALSE, B_TRUE); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); - zfs_dbgmsg("spa_import: verbatim import of %s", pool); - mutex_exit(&spa_namespace_lock); - return (0); - } +/* + * Verbatim import - Take a pool and insert it into the namespace + * as if it had been loaded at boot. + */ +if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { + if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); - spa_activate(spa, mode); + spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); + zfs_dbgmsg("spa_import: verbatim import of %s", pool); + mutex_exit(&spa_namespace_lock); + return (0); +} - /* - * Don't start async tasks until we know everything is healthy. - */ - spa_async_suspend(spa); +spa_activate(spa, mode); - zpool_get_load_policy(config, &policy); - if (policy.zlp_rewind & ZPOOL_DO_REWIND) - state = SPA_LOAD_RECOVER; +/* + * Don't start async tasks until we know everything is healthy. + */ +spa_async_suspend(spa); - spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT; +zpool_get_load_policy(config, &policy); +if (policy.zlp_rewind & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; - if (state != SPA_LOAD_RECOVER) { - spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; - zfs_dbgmsg("spa_import: importing %s", pool); - } else { - zfs_dbgmsg("spa_import: importing %s, max_txg=%lld " - "(RECOVERY MODE)", pool, (longlong_t)policy.zlp_txg); - } - error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); +spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT; - /* - * Propagate anything learned while loading the pool and pass it - * back to caller (i.e. rewind info, missing devices, etc). - */ - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); +if (state != SPA_LOAD_RECOVER) { + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + zfs_dbgmsg("spa_import: importing %s", pool); +} else { + zfs_dbgmsg("spa_import: importing %s, max_txg=%lld " + "(RECOVERY MODE)", pool, (longlong_t)policy.zlp_txg); +} +error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - /* - * Toss any existing sparelist, as it doesn't have any validity - * anymore, and conflicts with spa_has_spare(). - */ - if (spa->spa_spares.sav_config) { - nvlist_free(spa->spa_spares.sav_config); - spa->spa_spares.sav_config = NULL; - spa_load_spares(spa); - } - if (spa->spa_l2cache.sav_config) { - nvlist_free(spa->spa_l2cache.sav_config); - spa->spa_l2cache.sav_config = NULL; - spa_load_l2cache(spa); - } +/* + * Propagate anything learned while loading the pool and pass it + * back to caller (i.e. rewind info, missing devices, etc). + */ +VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - spa_config_exit(spa, SCL_ALL, FTAG); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +/* + * Toss any existing sparelist, as it doesn't have any validity + * anymore, and conflicts with spa_has_spare(). + */ +if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; + spa_load_spares(spa); +} +if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; + spa_load_l2cache(spa); +} - if (props != NULL) - spa_configfile_set(spa, props, B_FALSE); +VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); +spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0 || (props && spa_writeable(spa) && - (error = spa_prop_set(spa, props)))) { - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } +if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); - spa_async_resume(spa); +if (error != 0 || (props && spa_writeable(spa) && + (error = spa_prop_set(spa, props)))) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} - /* - * Override any spares and level 2 cache devices as specified by - * the user, as these may have correct device names/devids, etc. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - if (spa->spa_spares.sav_config) - VERIFY(nvlist_remove(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); - else - VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_spares.sav_sync = B_TRUE; - } - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - if (spa->spa_l2cache.sav_config) - VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); - else - VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_l2cache.sav_sync = B_TRUE; - } +spa_async_resume(spa); - /* - * Check for any removed devices. - */ - if (spa->spa_autoreplace) { - spa_aux_check_removed(&spa->spa_spares); - spa_aux_check_removed(&spa->spa_l2cache); - } +/* + * Override any spares and level 2 cache devices as specified by + * the user, as these may have correct device names/devids, etc. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + if (spa->spa_spares.sav_config) + VERIFY(nvlist_remove(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; +} +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + if (spa->spa_l2cache.sav_config) + VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; +} - if (spa_writeable(spa)) { - /* - * Update the config cache to include the newly-imported pool. - */ - spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); - } +/* + * Check for any removed devices. + */ +if (spa->spa_autoreplace) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); +} +if (spa_writeable(spa)) { /* - * It's possible that the pool was expanded while it was exported. - * We kick off an async task to handle this for us. + * Update the config cache to include the newly-imported pool. */ - spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); + spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); +} - spa_history_log_version(spa, "import", NULL); +/* + * It's possible that the pool was expanded while it was exported. + * We kick off an async task to handle this for us. + */ +spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); +spa_history_log_version(spa, "import", NULL); - zvol_create_minors(spa, pool, B_TRUE); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); - mutex_exit(&spa_namespace_lock); +zvol_create_minors(spa, pool, B_TRUE); - return (0); +mutex_exit(&spa_namespace_lock); + +return (0); } nvlist_t * spa_tryimport(nvlist_t *tryconfig) { - nvlist_t *config = NULL; - char *poolname, *cachefile; - spa_t *spa; - uint64_t state; - int error; - zpool_load_policy_t policy; +nvlist_t *config = NULL; +char *poolname, *cachefile; +spa_t *spa; +uint64_t state; +int error; +zpool_load_policy_t policy; - if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) - return (NULL); +if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) + return (NULL); - if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) - return (NULL); +if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) + return (NULL); - /* - * Create and initialize the spa structure. - */ - mutex_enter(&spa_namespace_lock); - spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); - spa_activate(spa, FREAD); +/* + * Create and initialize the spa structure. + */ +mutex_enter(&spa_namespace_lock); +spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); +spa_activate(spa, FREAD); - /* - * Rewind pool if a max txg was provided. - */ - zpool_get_load_policy(spa->spa_config, &policy); - if (policy.zlp_txg != UINT64_MAX) { - spa->spa_load_max_txg = policy.zlp_txg; - spa->spa_extreme_rewind = B_TRUE; - zfs_dbgmsg("spa_tryimport: importing %s, max_txg=%lld", - poolname, (longlong_t)policy.zlp_txg); - } else { - zfs_dbgmsg("spa_tryimport: importing %s", poolname); - } +/* + * Rewind pool if a max txg was provided. + */ +zpool_get_load_policy(spa->spa_config, &policy); +if (policy.zlp_txg != UINT64_MAX) { + spa->spa_load_max_txg = policy.zlp_txg; + spa->spa_extreme_rewind = B_TRUE; + zfs_dbgmsg("spa_tryimport: importing %s, max_txg=%lld", + poolname, (longlong_t)policy.zlp_txg); +} else { + zfs_dbgmsg("spa_tryimport: importing %s", poolname); +} - if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_CACHEFILE, &cachefile) - == 0) { - zfs_dbgmsg("spa_tryimport: using cachefile '%s'", cachefile); - spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; - } else { - spa->spa_config_source = SPA_CONFIG_SRC_SCAN; - } +if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_CACHEFILE, &cachefile) + == 0) { + zfs_dbgmsg("spa_tryimport: using cachefile '%s'", cachefile); + spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; +} else { + spa->spa_config_source = SPA_CONFIG_SRC_SCAN; +} - error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); +error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); + +/* + * If 'tryconfig' was at least parsable, return the current config. + */ +if (spa->spa_root_vdev != NULL) { + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, + poolname) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, + state) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + spa->spa_uberblock.ub_timestamp) == 0); + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, + spa->spa_errata) == 0); /* - * If 'tryconfig' was at least parsable, return the current config. + * If the bootfs property exists on this pool then we + * copy it out so that external consumers can tell which + * pools are bootable. */ - if (spa->spa_root_vdev != NULL) { - config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, - poolname) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, - state) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, - spa->spa_uberblock.ub_timestamp) == 0); - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, - spa->spa_errata) == 0); + if ((!error || error == EEXIST) && spa->spa_bootfs) { + char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* - * If the bootfs property exists on this pool then we - * copy it out so that external consumers can tell which - * pools are bootable. + * We have to play games with the name since the + * pool was opened as TRYIMPORT_NAME. */ - if ((!error || error == EEXIST) && spa->spa_bootfs) { - char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + if (dsl_dsobj_to_dsname(spa_name(spa), + spa->spa_bootfs, tmpname) == 0) { + char *cp; + char *dsname; - /* - * We have to play games with the name since the - * pool was opened as TRYIMPORT_NAME. - */ - if (dsl_dsobj_to_dsname(spa_name(spa), - spa->spa_bootfs, tmpname) == 0) { - char *cp; - char *dsname; - - dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - cp = strchr(tmpname, '/'); - if (cp == NULL) { - (void) strlcpy(dsname, tmpname, - MAXPATHLEN); - } else { - (void) snprintf(dsname, MAXPATHLEN, - "%s/%s", poolname, ++cp); - } - VERIFY(nvlist_add_string(config, - ZPOOL_CONFIG_BOOTFS, dsname) == 0); - kmem_free(dsname, MAXPATHLEN); + dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + cp = strchr(tmpname, '/'); + if (cp == NULL) { + (void) strlcpy(dsname, tmpname, + MAXPATHLEN); + } else { + (void) snprintf(dsname, MAXPATHLEN, + "%s/%s", poolname, ++cp); } - kmem_free(tmpname, MAXPATHLEN); + VERIFY(nvlist_add_string(config, + ZPOOL_CONFIG_BOOTFS, dsname) == 0); + kmem_free(dsname, MAXPATHLEN); } - - /* - * Add the list of hot spares and level 2 cache devices. - */ - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - spa_add_spares(spa, config); - spa_add_l2cache(spa, config); - spa_config_exit(spa, SCL_CONFIG, FTAG); + kmem_free(tmpname, MAXPATHLEN); } - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); + /* + * Add the list of hot spares and level 2 cache devices. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_add_spares(spa, config); + spa_add_l2cache(spa, config); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} + +spa_unload(spa); +spa_deactivate(spa); +spa_remove(spa); +mutex_exit(&spa_namespace_lock); - return (config); +return (config); } /* - * Pool export/destroy - * - * The act of destroying or exporting a pool is very simple. We make sure there - * is no more pending I/O and any references to the pool are gone. Then, we - * update the pool state and sync all the labels to disk, removing the - * configuration from the cache afterwards. If the 'hardforce' flag is set, then - * we don't sync the labels or remove the configuration cache. - */ +* Pool export/destroy +* +* The act of destroying or exporting a pool is very simple. We make sure there +* is no more pending I/O and any references to the pool are gone. Then, we +* update the pool state and sync all the labels to disk, removing the +* configuration from the cache afterwards. If the 'hardforce' flag is set, then +* we don't sync the labels or remove the configuration cache. +*/ static int spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, - boolean_t force, boolean_t hardforce) +boolean_t force, boolean_t hardforce) { - spa_t *spa; - - if (oldconfig) - *oldconfig = NULL; +spa_t *spa; - if (!(spa_mode_global & FWRITE)) - return (SET_ERROR(EROFS)); +if (oldconfig) + *oldconfig = NULL; - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(pool)) == NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } +if (!(spa_mode_global & FWRITE)) + return (SET_ERROR(EROFS)); - if (spa->spa_is_exporting) { - /* the pool is being exported by another thread */ - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS)); - } - spa->spa_is_exporting = B_TRUE; +mutex_enter(&spa_namespace_lock); +if ((spa = spa_lookup(pool)) == NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); +} - /* - * Put a hold on the pool, drop the namespace lock, stop async tasks, - * reacquire the namespace lock, and see if we can export. - */ - spa_open_ref(spa, FTAG); +if (spa->spa_is_exporting) { + /* the pool is being exported by another thread */ mutex_exit(&spa_namespace_lock); - spa_async_suspend(spa); - if (spa->spa_zvol_taskq) { - zvol_remove_minors(spa, spa_name(spa), B_TRUE); - taskq_wait(spa->spa_zvol_taskq); - } - mutex_enter(&spa_namespace_lock); - spa_close(spa, FTAG); + return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS)); +} +spa->spa_is_exporting = B_TRUE; - if (spa->spa_state == POOL_STATE_UNINITIALIZED) - goto export_spa; - /* - * The pool will be in core if it's openable, in which case we can - * modify its state. Objsets may be open only because they're dirty, - * so we have to force it to sync before checking spa_refcnt. - */ - if (spa->spa_sync_on) { - txg_wait_synced(spa->spa_dsl_pool, 0); - spa_evicting_os_wait(spa); - } +/* + * Put a hold on the pool, drop the namespace lock, stop async tasks, + * reacquire the namespace lock, and see if we can export. + */ +spa_open_ref(spa, FTAG); +mutex_exit(&spa_namespace_lock); +spa_async_suspend(spa); +if (spa->spa_zvol_taskq) { + zvol_remove_minors(spa, spa_name(spa), B_TRUE); + taskq_wait(spa->spa_zvol_taskq); +} +mutex_enter(&spa_namespace_lock); +spa_close(spa, FTAG); + +if (spa->spa_state == POOL_STATE_UNINITIALIZED) + goto export_spa; +/* + * The pool will be in core if it's openable, in which case we can + * modify its state. Objsets may be open only because they're dirty, + * so we have to force it to sync before checking spa_refcnt. + */ +if (spa->spa_sync_on) { + txg_wait_synced(spa->spa_dsl_pool, 0); + spa_evicting_os_wait(spa); +} + +/* + * A pool cannot be exported or destroyed if there are active + * references. If we are resetting a pool, allow references by + * fault injection handlers. + */ +if (!spa_refcount_zero(spa) || + (spa->spa_inject_ref != 0 && + new_state != POOL_STATE_UNINITIALIZED)) { + spa_async_resume(spa); + spa->spa_is_exporting = B_FALSE; + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EBUSY)); +} +if (spa->spa_sync_on) { /* - * A pool cannot be exported or destroyed if there are active - * references. If we are resetting a pool, allow references by - * fault injection handlers. + * A pool cannot be exported if it has an active shared spare. + * This is to prevent other pools stealing the active spare + * from an exported pool. At user's own will, such pool can + * be forcedly exported. */ - if (!spa_refcount_zero(spa) || - (spa->spa_inject_ref != 0 && - new_state != POOL_STATE_UNINITIALIZED)) { + if (!force && new_state == POOL_STATE_EXPORTED && + spa_has_active_shared_spare(spa)) { spa_async_resume(spa); spa->spa_is_exporting = B_FALSE; mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EBUSY)); + return (SET_ERROR(EXDEV)); } - if (spa->spa_sync_on) { - /* - * A pool cannot be exported if it has an active shared spare. - * This is to prevent other pools stealing the active spare - * from an exported pool. At user's own will, such pool can - * be forcedly exported. - */ - if (!force && new_state == POOL_STATE_EXPORTED && - spa_has_active_shared_spare(spa)) { - spa_async_resume(spa); - spa->spa_is_exporting = B_FALSE; - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EXDEV)); - } - - /* - * We're about to export or destroy this pool. Make sure - * we stop all initialization and trim activity here before - * we set the spa_final_txg. This will ensure that all - * dirty data resulting from the initialization is - * committed to disk before we unload the pool. - */ - if (spa->spa_root_vdev != NULL) { - vdev_t *rvd = spa->spa_root_vdev; - vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE); - vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE); - vdev_autotrim_stop_all(spa); - } + /* + * We're about to export or destroy this pool. Make sure + * we stop all initialization and trim activity here before + * we set the spa_final_txg. This will ensure that all + * dirty data resulting from the initialization is + * committed to disk before we unload the pool. + */ + if (spa->spa_root_vdev != NULL) { + vdev_t *rvd = spa->spa_root_vdev; + vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE); + vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE); + vdev_autotrim_stop_all(spa); + } - /* - * We want this to be reflected on every label, - * so mark them all dirty. spa_unload() will do the - * final sync that pushes these changes out. - */ - if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa->spa_state = new_state; - spa->spa_final_txg = spa_last_synced_txg(spa) + - TXG_DEFER_SIZE + 1; - vdev_config_dirty(spa->spa_root_vdev); - spa_config_exit(spa, SCL_ALL, FTAG); - } + /* + * We want this to be reflected on every label, + * so mark them all dirty. spa_unload() will do the + * final sync that pushes these changes out. + */ + if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa->spa_state = new_state; + spa->spa_final_txg = spa_last_synced_txg(spa) + + TXG_DEFER_SIZE + 1; + vdev_config_dirty(spa->spa_root_vdev); + spa_config_exit(spa, SCL_ALL, FTAG); } +} export_spa: - if (new_state == POOL_STATE_DESTROYED) - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY); - else if (new_state == POOL_STATE_EXPORTED) - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_EXPORT); +if (new_state == POOL_STATE_DESTROYED) + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY); +else if (new_state == POOL_STATE_EXPORTED) + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_EXPORT); - if (spa->spa_state != POOL_STATE_UNINITIALIZED) { - spa_unload(spa); - spa_deactivate(spa); - } +if (spa->spa_state != POOL_STATE_UNINITIALIZED) { + spa_unload(spa); + spa_deactivate(spa); +} - if (oldconfig && spa->spa_config) - VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); +if (oldconfig && spa->spa_config) + VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); - if (new_state != POOL_STATE_UNINITIALIZED) { - if (!hardforce) - spa_write_cachefile(spa, B_TRUE, B_TRUE); - spa_remove(spa); - } else { - /* - * If spa_remove() is not called for this spa_t and - * there is any possibility that it can be reused, - * we make sure to reset the exporting flag. - */ - spa->spa_is_exporting = B_FALSE; - } +if (new_state != POOL_STATE_UNINITIALIZED) { + if (!hardforce) + spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_remove(spa); +} else { + /* + * If spa_remove() is not called for this spa_t and + * there is any possibility that it can be reused, + * we make sure to reset the exporting flag. + */ + spa->spa_is_exporting = B_FALSE; +} - mutex_exit(&spa_namespace_lock); - return (0); +mutex_exit(&spa_namespace_lock); +return (0); } /* - * Destroy a storage pool. - */ +* Destroy a storage pool. +*/ int spa_destroy(char *pool) { - return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, - B_FALSE, B_FALSE)); +return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, + B_FALSE, B_FALSE)); } /* - * Export a storage pool. - */ +* Export a storage pool. +*/ int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce) +boolean_t hardforce) { - return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, - force, hardforce)); +return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, + force, hardforce)); } /* - * Similar to spa_export(), this unloads the spa_t without actually removing it - * from the namespace in any way. - */ +* Similar to spa_export(), this unloads the spa_t without actually removing it +* from the namespace in any way. +*/ int spa_reset(char *pool) { - return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, - B_FALSE, B_FALSE)); +return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, + B_FALSE, B_FALSE)); } /* - * ========================================================================== - * Device manipulation - * ========================================================================== - */ +* ========================================================================== +* Device manipulation +* ========================================================================== +*/ /* - * Add a device to a storage pool. - */ +* Add a device to a storage pool. +*/ int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { - uint64_t txg, id; - int error; - vdev_t *rvd = spa->spa_root_vdev; - vdev_t *vd, *tvd; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; +uint64_t txg, id; +int error; +vdev_t *rvd = spa->spa_root_vdev; +vdev_t *vd, *tvd; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; - ASSERT(spa_writeable(spa)); - - txg = spa_vdev_enter(spa); +ASSERT(spa_writeable(spa)); - if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, - VDEV_ALLOC_ADD)) != 0) - return (spa_vdev_exit(spa, NULL, txg, error)); +txg = spa_vdev_enter(spa); - spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ +if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, NULL, txg, error)); - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, - &nspares) != 0) - nspares = 0; +spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, - &nl2cache) != 0) - nl2cache = 0; +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares) != 0) + nspares = 0; - if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) - return (spa_vdev_exit(spa, vd, txg, EINVAL)); +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, + &nl2cache) != 0) + nl2cache = 0; - if (vd->vdev_children != 0 && - (error = vdev_create(vd, txg, B_FALSE)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); +if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) + return (spa_vdev_exit(spa, vd, txg, EINVAL)); - /* - * We must validate the spares and l2cache devices after checking the - * children. Otherwise, vdev_inuse() will blindly overwrite the spare. - */ - if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); +if (vd->vdev_children != 0 && + (error = vdev_create(vd, txg, B_FALSE)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); - /* - * If we are in the middle of a device removal, we can only add - * devices which match the existing devices in the pool. - * If we are in the middle of a removal, or have some indirect - * vdevs, we can not add raidz toplevels. - */ - if (spa->spa_vdev_removal != NULL || - spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { - for (int c = 0; c < vd->vdev_children; c++) { - tvd = vd->vdev_child[c]; - if (spa->spa_vdev_removal != NULL && - tvd->vdev_ashift != spa->spa_max_ashift) { - return (spa_vdev_exit(spa, vd, txg, EINVAL)); - } - /* Fail if top level vdev is raidz */ - if (tvd->vdev_ops == &vdev_raidz_ops) { - return (spa_vdev_exit(spa, vd, txg, EINVAL)); - } - /* - * Need the top level mirror to be - * a mirror of leaf vdevs only - */ - if (tvd->vdev_ops == &vdev_mirror_ops) { - for (uint64_t cid = 0; - cid < tvd->vdev_children; cid++) { - vdev_t *cvd = tvd->vdev_child[cid]; - if (!cvd->vdev_ops->vdev_op_leaf) { - return (spa_vdev_exit(spa, vd, - txg, EINVAL)); - } - } - } - } - } +/* + * We must validate the spares and l2cache devices after checking the + * children. Otherwise, vdev_inuse() will blindly overwrite the spare. + */ +if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); +/* + * If we are in the middle of a device removal, we can only add + * devices which match the existing devices in the pool. + * If we are in the middle of a removal, or have some indirect + * vdevs, we can not add raidz toplevels. + */ +if (spa->spa_vdev_removal != NULL || + spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { for (int c = 0; c < vd->vdev_children; c++) { - + tvd = vd->vdev_child[c]; + if (spa->spa_vdev_removal != NULL && + tvd->vdev_ashift != spa->spa_max_ashift) { + return (spa_vdev_exit(spa, vd, txg, EINVAL)); + } + /* Fail if top level vdev is raidz */ + if (tvd->vdev_ops == &vdev_raidz_ops) { + return (spa_vdev_exit(spa, vd, txg, EINVAL)); + } /* - * Set the vdev id to the first hole, if one exists. + * Need the top level mirror to be + * a mirror of leaf vdevs only */ - for (id = 0; id < rvd->vdev_children; id++) { - if (rvd->vdev_child[id]->vdev_ishole) { - vdev_free(rvd->vdev_child[id]); - break; + if (tvd->vdev_ops == &vdev_mirror_ops) { + for (uint64_t cid = 0; + cid < tvd->vdev_children; cid++) { + vdev_t *cvd = tvd->vdev_child[cid]; + if (!cvd->vdev_ops->vdev_op_leaf) { + return (spa_vdev_exit(spa, vd, + txg, EINVAL)); + } } } - tvd = vd->vdev_child[c]; - vdev_remove_child(vd, tvd); - tvd->vdev_id = id; - vdev_add_child(rvd, tvd); - vdev_config_dirty(tvd); - } - - if (nspares != 0) { - spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, - ZPOOL_CONFIG_SPARES); - spa_load_spares(spa); - spa->spa_spares.sav_sync = B_TRUE; } +} - if (nl2cache != 0) { - spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, - ZPOOL_CONFIG_L2CACHE); - spa_load_l2cache(spa); - spa->spa_l2cache.sav_sync = B_TRUE; - } +for (int c = 0; c < vd->vdev_children; c++) { /* - * We have to be careful when adding new vdevs to an existing pool. - * If other threads start allocating from these vdevs before we - * sync the config cache, and we lose power, then upon reboot we may - * fail to open the pool because there are DVAs that the config cache - * can't translate. Therefore, we first add the vdevs without - * initializing metaslabs; sync the config cache (via spa_vdev_exit()); - * and then let spa_config_update() initialize the new metaslabs. - * - * spa_load() checks for added-but-not-initialized vdevs, so that - * if we lose power at any point in this sequence, the remaining - * steps will be completed the next time we load the pool. + * Set the vdev id to the first hole, if one exists. */ - (void) spa_vdev_exit(spa, vd, txg, 0); + for (id = 0; id < rvd->vdev_children; id++) { + if (rvd->vdev_child[id]->vdev_ishole) { + vdev_free(rvd->vdev_child[id]); + break; + } + } + tvd = vd->vdev_child[c]; + vdev_remove_child(vd, tvd); + tvd->vdev_id = id; + vdev_add_child(rvd, tvd); + vdev_config_dirty(tvd); +} - mutex_enter(&spa_namespace_lock); - spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); - mutex_exit(&spa_namespace_lock); +if (nspares != 0) { + spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, + ZPOOL_CONFIG_SPARES); + spa_load_spares(spa); + spa->spa_spares.sav_sync = B_TRUE; +} - return (0); +if (nl2cache != 0) { + spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, + ZPOOL_CONFIG_L2CACHE); + spa_load_l2cache(spa); + spa->spa_l2cache.sav_sync = B_TRUE; } /* - * Attach a device to a mirror. The arguments are the path to any device - * in the mirror, and the nvroot for the new device. If the path specifies - * a device that is not mirrored, we automatically insert the mirror vdev. + * We have to be careful when adding new vdevs to an existing pool. + * If other threads start allocating from these vdevs before we + * sync the config cache, and we lose power, then upon reboot we may + * fail to open the pool because there are DVAs that the config cache + * can't translate. Therefore, we first add the vdevs without + * initializing metaslabs; sync the config cache (via spa_vdev_exit()); + * and then let spa_config_update() initialize the new metaslabs. * - * If 'replacing' is specified, the new device is intended to replace the - * existing device; in this case the two devices are made into their own - * mirror using the 'replacing' vdev, which is functionally identical to - * the mirror vdev (it actually reuses all the same ops) but has a few - * extra rules: you can't attach to it after it's been created, and upon - * completion of resilvering, the first disk (the one being replaced) - * is automatically detached. + * spa_load() checks for added-but-not-initialized vdevs, so that + * if we lose power at any point in this sequence, the remaining + * steps will be completed the next time we load the pool. */ +(void) spa_vdev_exit(spa, vd, txg, 0); + +mutex_enter(&spa_namespace_lock); +spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); +mutex_exit(&spa_namespace_lock); + +return (0); +} + +/* +* Attach a device to a mirror. The arguments are the path to any device +* in the mirror, and the nvroot for the new device. If the path specifies +* a device that is not mirrored, we automatically insert the mirror vdev. +* +* If 'replacing' is specified, the new device is intended to replace the +* existing device; in this case the two devices are made into their own +* mirror using the 'replacing' vdev, which is functionally identical to +* the mirror vdev (it actually reuses all the same ops) but has a few +* extra rules: you can't attach to it after it's been created, and upon +* completion of resilvering, the first disk (the one being replaced) +* is automatically detached. +*/ int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) { - uint64_t txg, dtl_max_txg; - ASSERTV(vdev_t *rvd = spa->spa_root_vdev); - vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; - vdev_ops_t *pvops; - char *oldvdpath, *newvdpath; - int newvd_isspare; - int error; +uint64_t txg, dtl_max_txg; +ASSERTV(vdev_t *rvd = spa->spa_root_vdev); +vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; +vdev_ops_t *pvops; +char *oldvdpath, *newvdpath; +int newvd_isspare = B_FALSE; +int error; +boolean_t raidz = B_FALSE; - ASSERT(spa_writeable(spa)); +ASSERT(spa_writeable(spa)); - txg = spa_vdev_enter(spa); +txg = spa_vdev_enter(spa); - oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); +oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { - error = (spa_has_checkpoint(spa)) ? - ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; - return (spa_vdev_exit(spa, NULL, txg, error)); - } +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, error)); +} - if (spa->spa_vdev_removal != NULL) - return (spa_vdev_exit(spa, NULL, txg, EBUSY)); +if (spa->spa_vdev_removal != NULL) + return (spa_vdev_exit(spa, NULL, txg, EBUSY)); - if (oldvd == NULL) - return (spa_vdev_exit(spa, NULL, txg, ENODEV)); +if (oldvd == NULL) + return (spa_vdev_exit(spa, NULL, txg, ENODEV)); - if (!oldvd->vdev_ops->vdev_op_leaf) - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); +if (oldvd->vdev_ops == &vdev_raidz_ops) { + raidz = B_TRUE; +} else if (!oldvd->vdev_ops->vdev_op_leaf) { + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); +} +if (raidz) + pvd = oldvd; +else pvd = oldvd->vdev_parent; - if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, - VDEV_ALLOC_ATTACH)) != 0) - return (spa_vdev_exit(spa, NULL, txg, EINVAL)); +if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, + VDEV_ALLOC_ATTACH)) != 0) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + +if (newrootvd->vdev_children != 1) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); - if (newrootvd->vdev_children != 1) - return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); +newvd = newrootvd->vdev_child[0]; - newvd = newrootvd->vdev_child[0]; +if (!newvd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); - if (!newvd->vdev_ops->vdev_op_leaf) - return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); +if ((error = vdev_create(newrootvd, txg, replacing)) != 0) + return (spa_vdev_exit(spa, newrootvd, txg, error)); - if ((error = vdev_create(newrootvd, txg, replacing)) != 0) - return (spa_vdev_exit(spa, newrootvd, txg, error)); +/* + * Spares can't replace logs + */ +if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); +if (!replacing) { /* - * Spares can't replace logs + * For attach, the only allowable parent is a mirror or the root + * vdev. */ - if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + if (pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_raidz_ops && + pvd->vdev_ops != &vdev_root_ops) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - if (!replacing) { - /* - * For attach, the only allowable parent is a mirror or the root - * vdev. - */ - if (pvd->vdev_ops != &vdev_mirror_ops && - pvd->vdev_ops != &vdev_root_ops) - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - - pvops = &vdev_mirror_ops; - } else { - /* - * Active hot spares can only be replaced by inactive hot - * spares. - */ - if (pvd->vdev_ops == &vdev_spare_ops && - oldvd->vdev_isspare && - !spa_has_spare(spa, newvd->vdev_guid)) - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - - /* - * If the source is a hot spare, and the parent isn't already a - * spare, then we want to create a new hot spare. Otherwise, we - * want to create a replacing vdev. The user is not allowed to - * attach to a spared vdev child unless the 'isspare' state is - * the same (spare replaces spare, non-spare replaces - * non-spare). - */ - if (pvd->vdev_ops == &vdev_replacing_ops && - spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - } else if (pvd->vdev_ops == &vdev_spare_ops && - newvd->vdev_isspare != oldvd->vdev_isspare) { - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - } - - if (newvd->vdev_isspare) - pvops = &vdev_spare_ops; - else - pvops = &vdev_replacing_ops; - } - + pvops = &vdev_mirror_ops; +} else { /* - * Make sure the new device is big enough. + * Active hot spares can only be replaced by inactive hot + * spares. */ - if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) - return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); + if (pvd->vdev_ops == &vdev_spare_ops && + oldvd->vdev_isspare && + !spa_has_spare(spa, newvd->vdev_guid)) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); /* - * The new device cannot have a higher alignment requirement - * than the top-level vdev. + * If the source is a hot spare, and the parent isn't already a + * spare, then we want to create a new hot spare. Otherwise, we + * want to create a replacing vdev. The user is not allowed to + * attach to a spared vdev child unless the 'isspare' state is + * the same (spare replaces spare, non-spare replaces + * non-spare). */ - if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) - return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); + if (pvd->vdev_ops == &vdev_replacing_ops && + spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } else if (pvd->vdev_ops == &vdev_spare_ops && + newvd->vdev_isspare != oldvd->vdev_isspare) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } - /* - * If this is an in-place replacement, update oldvd's path and devid - * to make it distinguishable from newvd, and unopenable from now on. - */ - if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { - spa_strfree(oldvd->vdev_path); - oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, - KM_SLEEP); - (void) sprintf(oldvd->vdev_path, "%s/%s", - newvd->vdev_path, "old"); - if (oldvd->vdev_devid != NULL) { - spa_strfree(oldvd->vdev_devid); - oldvd->vdev_devid = NULL; - } + if (newvd->vdev_isspare) + pvops = &vdev_spare_ops; + else + pvops = &vdev_replacing_ops; +} + +/* + * Make sure the new device is big enough. + */ +vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd; +if (newvd->vdev_asize < vdev_get_min_asize(min_vdev)) + return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); + +/* + * The new device cannot have a higher alignment requirement + * than the top-level vdev. + */ +if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) + return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); + +if (raidz) { + oldvdpath = kmem_asprintf("raidz%u-%u", + oldvd->vdev_nparity, oldvd->vdev_id); +} else { + oldvdpath = spa_strdup(oldvd->vdev_path); +} +newvdpath = spa_strdup(newvd->vdev_path); + +/* + * If this is an in-place replacement, update oldvd's path and devid + * to make it distinguishable from newvd, and unopenable from now on. + */ +if (strcmp(oldvdpath, newvdpath) == 0) { + spa_strfree(oldvd->vdev_path); + oldvd->vdev_path = kmem_alloc(strlen(newvdpath) + 5, + KM_SLEEP); + (void) sprintf(oldvd->vdev_path, "%s/old", + newvdpath); + if (oldvd->vdev_devid != NULL) { + spa_strfree(oldvd->vdev_devid); + oldvd->vdev_devid = NULL; } + spa_strfree(oldvdpath); + oldvdpath = spa_strdup(oldvd->vdev_path); +} - /* mark the device being resilvered */ +/* mark the device being resilvered */ +if (!raidz) newvd->vdev_resilver_txg = txg; - /* - * If the parent is not a mirror, or if we're replacing, insert the new - * mirror/replacing/spare vdev above oldvd. - */ - if (pvd->vdev_ops != pvops) - pvd = vdev_add_parent(oldvd, pvops); +/* + * If the parent is not a mirror, or if we're replacing, insert the new + * mirror/replacing/spare vdev above oldvd. + */ +if (!raidz && pvd->vdev_ops != pvops) + pvd = vdev_add_parent(oldvd, pvops); - ASSERT(pvd->vdev_top->vdev_parent == rvd); - ASSERT(pvd->vdev_ops == pvops); - ASSERT(oldvd->vdev_parent == pvd); +ASSERT(pvd->vdev_top->vdev_parent == rvd); +#if 0 +ASSERT(pvd->vdev_ops == pvops); +ASSERT(oldvd->vdev_parent == pvd); +#endif - /* - * Extract the new device from its root and add it to pvd. - */ - vdev_remove_child(newrootvd, newvd); - newvd->vdev_id = pvd->vdev_children; - newvd->vdev_crtxg = oldvd->vdev_crtxg; - vdev_add_child(pvd, newvd); +/* + * Extract the new device from its root and add it to pvd. + */ +vdev_remove_child(newrootvd, newvd); +newvd->vdev_id = pvd->vdev_children; +newvd->vdev_crtxg = oldvd->vdev_crtxg; +vdev_add_child(pvd, newvd); - /* - * Reevaluate the parent vdev state. - */ - vdev_propagate_state(pvd); +/* + * Reevaluate the parent vdev state. + */ +vdev_propagate_state(pvd); - tvd = newvd->vdev_top; - ASSERT(pvd->vdev_top == tvd); - ASSERT(tvd->vdev_parent == rvd); +tvd = newvd->vdev_top; +ASSERT(pvd->vdev_top == tvd); +ASSERT(tvd->vdev_parent == rvd); - vdev_config_dirty(tvd); +vdev_config_dirty(tvd); - /* - * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account - * for any dmu_sync-ed blocks. It will propagate upward when - * spa_vdev_exit() calls vdev_dtl_reassess(). - */ - dtl_max_txg = txg + TXG_CONCURRENT_STATES; +/* + * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account + * for any dmu_sync-ed blocks. It will propagate upward when + * spa_vdev_exit() calls vdev_dtl_reassess(). + */ +dtl_max_txg = txg + TXG_CONCURRENT_STATES; +if (raidz) { + dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync, + newvd, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED, tx); + dmu_tx_commit(tx); +} else { vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, dtl_max_txg - TXG_INITIAL); @@ -6225,8 +6263,6 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE); } - oldvdpath = spa_strdup(oldvd->vdev_path); - newvdpath = spa_strdup(newvd->vdev_path); newvd_isspare = newvd->vdev_isspare; /* @@ -6234,17 +6270,18 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ vdev_dirty(tvd, VDD_DTL, newvd, txg); - /* - * Schedule the resilver to restart in the future. We do this to - * ensure that dmu_sync-ed blocks have been stitched into the - * respective datasets. We do not do this if resilvers have been - * deferred. - */ - if (dsl_scan_resilvering(spa_get_dsl(spa)) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_defer_resilver(newvd); - else - dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg); + /* + * Schedule the resilver to restart in the future. We do this to + * ensure that dmu_sync-ed blocks have been stitched into the + * respective datasets. We do not do this if resilvers have been + * deferred. + */ + if (dsl_scan_resilvering(spa_get_dsl(spa)) && + spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_set_deferred_resilver(spa, newvd); + else + dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); + } if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); @@ -6256,6 +6293,20 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); + /* + * XXX since we only initiated the expand, this should be done + * when it finishes, instead + * XXX this seems to work before the expansion completes, + * but shit will probably break if we try to write to the new + * space. Need to modify vdev_online() / size-calculating code + * to ignore new device if expansion not yet completed. + */ +#if 0 + if (raidz) { + error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL); + } +#endif + spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : @@ -7530,6 +7581,10 @@ spa_async_suspend(spa_t *spa) if (condense_thread != NULL) zthr_cancel(condense_thread); + zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr; + if (raidz_expand_thread != NULL) + zthr_cancel(raidz_expand_thread); + zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; if (discard_thread != NULL) zthr_cancel(discard_thread); @@ -7548,6 +7603,10 @@ spa_async_resume(spa_t *spa) if (condense_thread != NULL) zthr_resume(condense_thread); + zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr; + if (raidz_expand_thread != NULL) + zthr_resume(raidz_expand_thread); + zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; if (discard_thread != NULL) zthr_resume(discard_thread); @@ -8262,6 +8321,28 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx) != NULL) vdev_sync(vd, txg); + if (pass == 1) { + /* + * dsl_pool_sync() -> dp_sync_tasks may have dirtied + * the config. If that happens, we don't want this + * txg to be able to be a no-op, so be sure to sync + * the config to the MOS before checking for no-op + * txg below. + * + * Note that when the config is dirty, it will + * be written to the MOS (i.e. the MOS will be + * dirtied) every time we call spa_sync_config_object() + * in this txg. Therefore we can't call this after + * dsl_pool_sync() every pass, because it would + * prevent us from converging, since we'd dirty + * the MOS every pass. + * + * Sync tasks can only be processed in pass 1, so + * there's no need to do this in later passes. + */ + spa_sync_config_object(spa, tx); + } + /* * Note: We need to check if the MOS is dirty because we could * have marked the MOS dirty without updating the uberblock diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index ecdb3c61519..7edcf2b3295 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2299,7 +2299,7 @@ spa_init(int mode) zil_init(); vdev_cache_stat_init(); vdev_mirror_stat_init(); - vdev_raidz_math_init(); + /* vdev_raidz_math_init(); */ vdev_file_init(); zfs_prop_init(); zpool_prop_init(); @@ -2321,7 +2321,7 @@ spa_fini(void) vdev_file_fini(); vdev_cache_stat_fini(); vdev_mirror_stat_fini(); - vdev_raidz_math_fini(); + /* vdev_raidz_math_fini(); */ zil_fini(); dmu_fini(); zio_fini(); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index a68c0dfa737..84551f92a16 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -586,7 +587,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, { vdev_ops_t *ops; char *type; - uint64_t guid = 0, islog, nparity; + uint64_t guid = 0, islog; vdev_t *vd; vdev_indirect_config_t *vic; char *tmp = NULL; @@ -643,41 +644,14 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES) return (SET_ERROR(ENOTSUP)); - /* - * Set the nparity property for RAID-Z vdevs. - */ - nparity = -1ULL; + void *tsd = NULL; + int nparity = 0; if (ops == &vdev_raidz_ops) { - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, - &nparity) == 0) { - if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY) - return (SET_ERROR(EINVAL)); - /* - * Previous versions could only support 1 or 2 parity - * device. - */ - if (nparity > 1 && - spa_version(spa) < SPA_VERSION_RAIDZ2) - return (SET_ERROR(ENOTSUP)); - if (nparity > 2 && - spa_version(spa) < SPA_VERSION_RAIDZ3) - return (SET_ERROR(ENOTSUP)); - } else { - /* - * We require the parity to be specified for SPAs that - * support multiple parity levels. - */ - if (spa_version(spa) >= SPA_VERSION_RAIDZ2) - return (SET_ERROR(EINVAL)); - /* - * Otherwise, we default to 1 parity device for RAID-Z. - */ - nparity = 1; - } - } else { - nparity = 0; + vdev_raidz_t *rz = tsd = vdev_raidz_get_tsd(spa, nv); + if (rz == NULL) + return (SET_ERROR(EINVAL)); + nparity = rz->vd_nparity; } - ASSERT(nparity != -1ULL); /* * If creating a top-level vdev, check for allocation classes input @@ -705,6 +679,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, vd->vdev_nparity = nparity; if (top_level && alloc_bias != VDEV_BIAS_NONE) vd->vdev_alloc_bias = alloc_bias; + vd->vdev_tsd = tsd; if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) vd->vdev_path = spa_strdup(vd->vdev_path); @@ -919,6 +894,11 @@ vdev_free(vdev_t *vd) ASSERT(vd->vdev_child == NULL); ASSERT(vd->vdev_guid_sum == vd->vdev_guid); + if (vd->vdev_ops == &vdev_raidz_ops) { + vdev_raidz_t *rz = vd->vdev_tsd; + kmem_free(rz, sizeof (*rz)); + } + /* * Discard allocation state. */ @@ -3013,6 +2993,12 @@ vdev_load(vdev_t *vd) vdev_set_deflate_ratio(vd); + if (vd->vdev_ops == &vdev_raidz_ops) { + error = vdev_raidz_load(vd); + if (error != 0) + return (error); + } + /* * On spa_load path, grab the allocation bias from our zap */ @@ -3444,8 +3430,10 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) return (spa_vdev_state_exit(spa, NULL, ENODEV)); +#if 0 if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); +#endif wasoffline = (vd->vdev_offline || vd->vdev_tmpoffline); oldstate = vd->vdev_state; diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 6320732ed6d..4919ac71ce5 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -142,6 +142,7 @@ #include #include #include +#include #include #include #include @@ -402,6 +403,13 @@ root_vdev_actions_getprogress(vdev_t *vd, nvlist_t *nvl) ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t *)&pcs, sizeof (pcs) / sizeof (uint64_t)); } + + pool_raidz_expand_stat_t pres; + if (spa_raidz_expand_get_stats(spa, &pres) == 0) { + fnvlist_add_uint64_array(nvl, + ZPOOL_CONFIG_RAIDZ_EXPAND_STATS, (uint64_t *)&pres, + sizeof (pres) / sizeof (uint64_t)); + } } /* @@ -438,31 +446,13 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_fru != NULL) fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru); - if (vd->vdev_nparity != 0) { - ASSERT(strcmp(vd->vdev_ops->vdev_op_type, - VDEV_TYPE_RAIDZ) == 0); - - /* - * Make sure someone hasn't managed to sneak a fancy new vdev - * into a crufty old storage pool. - */ - ASSERT(vd->vdev_nparity == 1 || - (vd->vdev_nparity <= 2 && - spa_version(spa) >= SPA_VERSION_RAIDZ2) || - (vd->vdev_nparity <= 3 && - spa_version(spa) >= SPA_VERSION_RAIDZ3)); + if (vd->vdev_ops == &vdev_raidz_ops) + vdev_raidz_config_generate(vd, nv); - /* - * Note that we'll add the nparity tag even on storage pools - * that only support a single parity device -- older software - * will just ignore it. - */ - fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity); - } - - if (vd->vdev_wholedisk != -1ULL) + if (vd->vdev_wholedisk != -1ULL) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, vd->vdev_wholedisk); + } if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING)) fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index f63ccaa94cb..ee768bd8b70 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -27,10 +27,15 @@ #include #include +#include +#include #include +#include #include #include +#include #include +#include #include #include #include @@ -134,25 +139,121 @@ VDEV_RAIDZ_64MUL_2((x), mask); \ } -void -vdev_raidz_map_free(raidz_map_t *rm) +/* Powers of 2 in the RAID-Z Galois field. */ +const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, + 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, + 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, + 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, + 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, + 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, + 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, + 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, + 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, + 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, + 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, + 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, + 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, + 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, + 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, + 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, + 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, + 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, + 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, + 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, + 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, + 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, + 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, + 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, + 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; + +/* Logs of 2 in the RAID-Z Galois field. */ +const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = { + 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, + 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, + 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, + 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, + 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, + 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, + 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, + 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, + 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, + 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, + 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, + 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, + 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, + 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, + 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, + 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, + 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, + 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, + 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, + 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, + 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, + 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, + 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, + 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, + 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, + 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, +}; + +uint64_t zfs_raidz_expand_max_offset_pause = UINT64_MAX; + +uint64_t zfs_raidz_expand_max_copy_bytes = 10 * SPA_MAXBLOCKSIZE; + +static void +vdev_raidz_row_free(raidz_row_t *rr) { int c; - for (c = 0; c < rm->rm_firstdatacol; c++) { - abd_free(rm->rm_col[c].rc_abd); + for (c = 0; c < rr->rr_firstdatacol && c < rr->rr_cols; c++) { + abd_free(rr->rr_col[c].rc_abd); - if (rm->rm_col[c].rc_gdata != NULL) - abd_free(rm->rm_col[c].rc_gdata); + if (rr->rr_col[c].rc_gdata != NULL) { + abd_free(rr->rr_col[c].rc_gdata); + } + if (rr->rr_col[c].rc_orig_data != NULL) { + zio_buf_free(rr->rr_col[c].rc_orig_data, + rr->rr_col[c].rc_size); + } + } + for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + if (rr->rr_col[c].rc_size != 0) + abd_put(rr->rr_col[c].rc_abd); + if (rr->rr_col[c].rc_orig_data != NULL) { + zio_buf_free(rr->rr_col[c].rc_orig_data, + rr->rr_col[c].rc_size); + } } - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) - abd_put(rm->rm_col[c].rc_abd); + if (rr->rr_abd_copy != NULL) + abd_free(rr->rr_abd_copy); - if (rm->rm_abd_copy != NULL) - abd_free(rm->rm_abd_copy); + kmem_free(rr, offsetof(raidz_row_t, rr_col[rr->rr_cols])); +} - kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols])); +void +vdev_raidz_map_free(raidz_map_t *rm) +{ + for (int i = 0; i < rm->rm_nrows; i++) { + vdev_raidz_row_free(rm->rm_row[i]); + } + ASSERT3P(rm->rm_lr, ==, NULL); + kmem_free(rm, offsetof(raidz_map_t, rm_row[rm->rm_nrows])); } static void @@ -161,10 +262,11 @@ vdev_raidz_map_free_vsd(zio_t *zio) raidz_map_t *rm = zio->io_vsd; ASSERT0(rm->rm_freed); - rm->rm_freed = 1; + rm->rm_freed = B_TRUE; - if (rm->rm_reports == 0) + if (rm->rm_reports == 0) { vdev_raidz_map_free(rm); + } } /*ARGSUSED*/ @@ -175,7 +277,7 @@ vdev_raidz_cksum_free(void *arg, size_t ignored) ASSERT3U(rm->rm_reports, >, 0); - if (--rm->rm_reports == 0 && rm->rm_freed != 0) + if (--rm->rm_reports == 0 && rm->rm_freed) vdev_raidz_map_free(rm); } @@ -183,24 +285,28 @@ static void vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) { raidz_map_t *rm = zcr->zcr_cbdata; - const size_t c = zcr->zcr_cbinfo; - size_t x, offset; - - const abd_t *good = NULL; - const abd_t *bad = rm->rm_col[c].rc_abd; + zfs_dbgmsg("checksum error on rm=%p", rm); if (good_data == NULL) { zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); return; } - if (c < rm->rm_firstdatacol) { + zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); +#if 0 + const size_t c = zcr->zcr_cbinfo; + size_t x, offset; + + const abd_t *good = NULL; + const abd_t *bad = rm->rm_col[c].rc_abd; + + if (c < rm->rr_firstdatacol) { /* * The first time through, calculate the parity blocks for * the good data (this relies on the fact that the good * data never changes for a given logical ZIO) */ - if (rm->rm_col[0].rc_gdata == NULL) { + if (rm->rr_col[0].rc_gdata == NULL) { abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; /* @@ -208,23 +314,23 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) * good_data, first saving the parity bufs and * replacing them with buffers to hold the result. */ - for (x = 0; x < rm->rm_firstdatacol; x++) { - bad_parity[x] = rm->rm_col[x].rc_abd; - rm->rm_col[x].rc_abd = - rm->rm_col[x].rc_gdata = - abd_alloc_sametype(rm->rm_col[x].rc_abd, - rm->rm_col[x].rc_size); + for (x = 0; x < rm->rr_firstdatacol; x++) { + bad_parity[x] = rm->rr_col[x].rc_abd; + rm->rr_col[x].rc_abd = + rm->rr_col[x].rc_gdata = + abd_alloc_sametype(rm->rr_col[x].rc_abd, + rm->rr_col[x].rc_size); } /* fill in the data columns from good_data */ offset = 0; - for (; x < rm->rm_cols; x++) { - abd_put(rm->rm_col[x].rc_abd); + for (; x < rm->rr_cols; x++) { + abd_put(rm->rr_col[x].rc_abd); - rm->rm_col[x].rc_abd = + rm->rr_col[x].rc_abd = abd_get_offset_size((abd_t *)good_data, - offset, rm->rm_col[x].rc_size); - offset += rm->rm_col[x].rc_size; + offset, rm->rr_col[x].rc_size); + offset += rm->rr_col[x].rc_size; } /* @@ -233,27 +339,27 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) vdev_raidz_generate_parity(rm); /* restore everything back to its original state */ - for (x = 0; x < rm->rm_firstdatacol; x++) - rm->rm_col[x].rc_abd = bad_parity[x]; + for (x = 0; x < rm->rr_firstdatacol; x++) + rm->rr_col[x].rc_abd = bad_parity[x]; offset = 0; - for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { - abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_offset_size( - rm->rm_abd_copy, offset, - rm->rm_col[x].rc_size); - offset += rm->rm_col[x].rc_size; + for (x = rm->rr_firstdatacol; x < rm->rr_cols; x++) { + abd_put(rm->rr_col[x].rc_abd); + rm->rr_col[x].rc_abd = abd_get_offset_size( + rm->rr_abd_copy, offset, + rm->rr_col[x].rc_size); + offset += rm->rr_col[x].rc_size; } } - ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL); - good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0, - rm->rm_col[c].rc_size); + ASSERT3P(rm->rr_col[c].rc_gdata, !=, NULL); + good = abd_get_offset_size(rm->rr_col[c].rc_gdata, 0, + rm->rr_col[c].rc_size); } else { /* adjust good_data to point at the start of our column */ offset = 0; - for (x = rm->rm_firstdatacol; x < c; x++) - offset += rm->rm_col[x].rc_size; + for (x = rm->rr_firstdatacol; x < c; x++) + offset += rm->rr_col[x].rc_size; good = abd_get_offset_size((abd_t *)good_data, offset, rm->rm_col[c].rc_size); @@ -262,6 +368,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) /* we drop the ereport if it ends up that the data was good */ zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); abd_put((abd_t *)good); +#endif } /* @@ -274,10 +381,7 @@ static void vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) { size_t c = (size_t)(uintptr_t)arg; - size_t offset; - raidz_map_t *rm = zio->io_vsd; - size_t size; /* set up the report and bump the refcount */ zcr->zcr_cbdata = rm; @@ -288,7 +392,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) rm->rm_reports++; ASSERT3U(rm->rm_reports, >, 0); - if (rm->rm_abd_copy != NULL) + if (rm->rm_row[0]->rr_abd_copy != NULL) return; /* @@ -300,25 +404,34 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) * to copy them. */ - size = 0; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) - size += rm->rm_col[c].rc_size; + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + size_t offset; + size_t size = 0; + for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) + size += rr->rr_col[c].rc_size; + + rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE); - rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE); + for (offset = 0, c = rr->rr_firstdatacol; + c < rr->rr_cols; c++) { + raidz_col_t *col = &rr->rr_col[c]; + + if (col->rc_size == 0) + continue; - for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - raidz_col_t *col = &rm->rm_col[c]; - abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset, - col->rc_size); + abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy, + offset, col->rc_size); - abd_copy(tmp, col->rc_abd, col->rc_size); + abd_copy(tmp, col->rc_abd, col->rc_size); - abd_put(col->rc_abd); - col->rc_abd = tmp; + abd_put(col->rc_abd); + col->rc_abd = tmp; - offset += col->rc_size; + offset += col->rc_size; + } + ASSERT3U(offset, ==, size); } - ASSERT3U(offset, ==, size); } static const zio_vsd_ops_t vdev_raidz_vsd_ops = { @@ -337,7 +450,7 @@ noinline raidz_map_t * vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, uint64_t nparity) { - raidz_map_t *rm; + raidz_row_t *rr; /* The starting RAIDZ (parent) vdev sector of the block. */ uint64_t b = zio->io_offset >> ashift; /* The zio's size in units of the vdev's minimum sector size. */ @@ -346,9 +459,13 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, uint64_t f = b % dcols; /* The starting byte offset on each child vdev. */ uint64_t o = (b / dcols) << ashift; - uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; + uint64_t q, r, c, bc, col, acols, coff, devidx, asize, tot; uint64_t off = 0; + raidz_map_t *rm = + kmem_zalloc(offsetof(raidz_map_t, rm_row[1]), KM_SLEEP); + rm->rm_nrows = 1; + /* * "Quotient": The number of data sectors for this stripe on all but * the "big column" child vdevs that also contain "remainder" data. @@ -371,77 +488,64 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, tot = s + nparity * (q + (r == 0 ? 0 : 1)); /* acols: The columns that will be accessed. */ - /* scols: The columns that will be accessed or skipped. */ if (q == 0) { /* Our I/O request doesn't span all child vdevs. */ acols = bc; - scols = MIN(dcols, roundup(bc, nparity + 1)); } else { acols = dcols; - scols = dcols; } - ASSERT3U(acols, <=, scols); - - rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_SLEEP); + rr = kmem_alloc(offsetof(raidz_row_t, rr_col[acols]), KM_SLEEP); + rm->rm_row[0] = rr; - rm->rm_cols = acols; - rm->rm_scols = scols; - rm->rm_bigcols = bc; - rm->rm_skipstart = bc; - rm->rm_missingdata = 0; - rm->rm_missingparity = 0; - rm->rm_firstdatacol = nparity; - rm->rm_abd_copy = NULL; - rm->rm_reports = 0; - rm->rm_freed = 0; - rm->rm_ecksuminjected = 0; + rr->rr_cols = acols; + rr->rr_missingdata = 0; + rr->rr_missingparity = 0; + rr->rr_firstdatacol = nparity; + rr->rr_abd_copy = NULL; asize = 0; - for (c = 0; c < scols; c++) { + for (c = 0; c < acols; c++) { col = f + c; coff = o; if (col >= dcols) { col -= dcols; coff += 1ULL << ashift; } - rm->rm_col[c].rc_devidx = col; - rm->rm_col[c].rc_offset = coff; - rm->rm_col[c].rc_abd = NULL; - rm->rm_col[c].rc_gdata = NULL; - rm->rm_col[c].rc_error = 0; - rm->rm_col[c].rc_tried = 0; - rm->rm_col[c].rc_skipped = 0; - - if (c >= acols) - rm->rm_col[c].rc_size = 0; - else if (c < bc) - rm->rm_col[c].rc_size = (q + 1) << ashift; + rr->rr_col[c].rc_devidx = col; + rr->rr_col[c].rc_offset = coff; + rr->rr_col[c].rc_abd = NULL; + rr->rr_col[c].rc_gdata = NULL; + rr->rr_col[c].rc_orig_data = NULL; + rr->rr_col[c].rc_error = 0; + rr->rr_col[c].rc_tried = 0; + rr->rr_col[c].rc_skipped = 0; + rr->rr_col[c].rc_need_orig_restore = B_FALSE; + + if (c < bc) + rr->rr_col[c].rc_size = (q + 1) << ashift; else - rm->rm_col[c].rc_size = q << ashift; + rr->rr_col[c].rc_size = q << ashift; - asize += rm->rm_col[c].rc_size; + asize += rr->rr_col[c].rc_size; } ASSERT3U(asize, ==, tot << ashift); - rm->rm_asize = roundup(asize, (nparity + 1) << ashift); rm->rm_nskip = roundup(tot, nparity + 1) - tot; - ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << ashift); - ASSERT3U(rm->rm_nskip, <=, nparity); - for (c = 0; c < rm->rm_firstdatacol; c++) - rm->rm_col[c].rc_abd = - abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE); + for (c = 0; c < rr->rr_firstdatacol; c++) + rr->rr_col[c].rc_abd = + abd_alloc_linear(rr->rr_col[c].rc_size, B_FALSE); - rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0, - rm->rm_col[c].rc_size); - off = rm->rm_col[c].rc_size; + rr->rr_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0, + rr->rr_col[c].rc_size); + off = rr->rr_col[c].rc_size; for (c = c + 1; c < acols; c++) { - rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off, - rm->rm_col[c].rc_size); - off += rm->rm_col[c].rc_size; + rr->rr_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off, + rr->rr_col[c].rc_size); + off += rr->rr_col[c].rc_size; } /* @@ -464,26 +568,204 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, * skip the first column since at least one data and one parity * column must appear in each row. */ - ASSERT(rm->rm_cols >= 2); - ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size); + ASSERT(rr->rr_cols >= 2); + ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size); + + if (rr->rr_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) { + devidx = rr->rr_col[0].rc_devidx; + o = rr->rr_col[0].rc_offset; + rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx; + rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset; + rr->rr_col[1].rc_devidx = devidx; + rr->rr_col[1].rc_offset = o; + } - if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) { - devidx = rm->rm_col[0].rc_devidx; - o = rm->rm_col[0].rc_offset; - rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx; - rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset; - rm->rm_col[1].rc_devidx = devidx; - rm->rm_col[1].rc_offset = o; + /* init RAIDZ parity ops */ + // rm->rm_ops = vdev_raidz_math_get_ops(); + return (rm); +} - if (rm->rm_skipstart == 0) - rm->rm_skipstart = 1; - } +/* + * If reflow is not in progress, reflow_offset should be UINT64_MAX. + * For each row, if the row is entirely before reflow_offset, it will + * come from the new location. Otherwise this row will come from the + * old location. Therefore, rows that straddle the reflow_offset will + * come from the old location. + */ +static raidz_map_t * +vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, + uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols, + uint64_t nparity, uint64_t reflow_offset) +{ + /* The zio's size in units of the vdev's minimum sector size. */ + uint64_t s = size >> ashift; + uint64_t q, r, bc, devidx, asize, tot; - zio->io_vsd = rm; - zio->io_vsd_ops = &vdev_raidz_vsd_ops; + /* + * "Quotient": The number of data sectors for this stripe on all but + * the "big column" child vdevs that also contain "remainder" data. + * AKA "full rows" + */ + q = s / (logical_cols - nparity); + + /* + * "Remainder": The number of partial stripe data sectors in this I/O. + * This will add a sector to some, but not all, child vdevs. + */ + r = s - q * (logical_cols - nparity); + + /* The number of "big columns" - those which contain remainder data. */ + bc = (r == 0 ? 0 : r + nparity); + + /* + * The total number of data and parity sectors associated with + * this I/O. + */ + tot = s + nparity * (q + (r == 0 ? 0 : 1)); + + /* How many rows contain data (not skip) */ + uint64_t rows = howmany(tot, logical_cols); + int cols = MIN(tot, logical_cols); + + raidz_map_t *rm = + kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]), + KM_SLEEP); + rm->rm_nrows = rows; + rm->rm_nskip = roundup(tot, nparity + 1) - tot; + asize = 0; + + zfs_dbgmsg("rm=%p s=%d q=%d r=%d bc=%d nrows=%d cols=%d rfo=%llx", + rm, (int)s, (int)q, (int)r, (int)bc, (int)rows, (int)cols, + (long long)reflow_offset); + + for (uint64_t row = 0; row < rows; row++) { + raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t, + rr_col[cols]), KM_SLEEP); + rm->rm_row[row] = rr; + + /* The starting RAIDZ (parent) vdev sector of the row. */ + uint64_t b = (offset >> ashift) + row * logical_cols; + + /* + * If we are in the middle of a reflow, and any part of this + * row has not been copied, then use the old location of + * this row. + */ + int row_phys_cols = physical_cols; + if (b + (logical_cols - nparity) > reflow_offset >> ashift) + row_phys_cols--; + + /* starting child of this row */ + uint64_t child_id = b % row_phys_cols; + /* The starting byte offset on each child vdev. */ + uint64_t child_offset = (b / row_phys_cols) << ashift; + + /* + * We set cols to the entire width of the block, even + * if this row is shorter. This is needed because parity + * generation (for Q and R) needs to know the entire width, + * because it treats the short row as though it was + * full-width (and the "phantom" sectors were zero-filled). + * + * Another approach to this would be to set cols shorter + * (to just the number of columns that we might do i/o to) + * and have another mechanism to tell the parity generation + * about the "entire width". Reconstruction (at least + * vdev_raidz_reconstruct_general()) would also need to + * know about the "entire width". + */ + rr->rr_cols = cols; + rr->rr_missingdata = 0; + rr->rr_missingparity = 0; + rr->rr_firstdatacol = nparity; + rr->rr_abd_copy = NULL; + + for (int c = 0; c < rr->rr_cols; c++, child_id++) { + if (child_id >= row_phys_cols) { + child_id -= row_phys_cols; + child_offset += 1ULL << ashift; + } + rr->rr_col[c].rc_devidx = child_id; + rr->rr_col[c].rc_offset = child_offset; + rr->rr_col[c].rc_gdata = NULL; + rr->rr_col[c].rc_orig_data = NULL; + rr->rr_col[c].rc_error = 0; + rr->rr_col[c].rc_tried = 0; + rr->rr_col[c].rc_skipped = 0; + rr->rr_col[c].rc_need_orig_restore = B_FALSE; + + uint64_t dc = c - rr->rr_firstdatacol; + if (c < rr->rr_firstdatacol) { + rr->rr_col[c].rc_size = 1ULL << ashift; + rr->rr_col[c].rc_abd = + abd_alloc_linear(rr->rr_col[c].rc_size, + B_TRUE); + } else if (row == rows - 1 && bc != 0 && c >= bc) { + /* + * Past the end, this for parity generation. + */ + rr->rr_col[c].rc_size = 0; + rr->rr_col[c].rc_abd = NULL; + } else { + /* XXX ASCII art diagram here */ + /* "data column" (col excluding parity) */ + uint64_t off; + + if (c < bc || r == 0) { + off = dc * rows + row; + } else { + off = r * rows + + (dc - r) * (rows - 1) + row; + } + zfs_dbgmsg("rm=%p row=%d c=%d dc=%d off=%u " + "devidx=%u rpc=%u", + rm, (int)row, (int)c, (int)dc, (int)off, + (int)child_id, (int)row_phys_cols); + rr->rr_col[c].rc_size = 1ULL << ashift; + rr->rr_col[c].rc_abd = + abd_get_offset(abd, off << ashift); + } + + asize += rr->rr_col[c].rc_size; + } + + /* + * If all data stored spans all columns, there's a danger that parity + * will always be on the same device and, since parity isn't read + * during normal operation, that that device's I/O bandwidth won't be + * used effectively. We therefore switch the parity every 1MB. + * + * ... at least that was, ostensibly, the theory. As a practical + * matter unless we juggle the parity between all devices evenly, we + * won't see any benefit. Further, occasional writes that aren't a + * multiple of the LCM of the number of children and the minimum + * stripe width are sufficient to avoid pessimal behavior. + * Unfortunately, this decision created an implicit on-disk format + * requirement that we need to support for all eternity, but only + * for single-parity RAID-Z. + * + * If we intend to skip a sector in the zeroth column for padding + * we must make sure to note this swap. We will never intend to + * skip the first column since at least one data and one parity + * column must appear in each row. + */ + if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 && + (offset & (1ULL << 20))) { + ASSERT(rr->rr_cols >= 2); + ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size); + devidx = rr->rr_col[0].rc_devidx; + uint64_t o = rr->rr_col[0].rc_offset; + rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx; + rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset; + rr->rr_col[1].rc_devidx = devidx; + rr->rr_col[1].rc_offset = o; + } + + } + ASSERT3U(asize, ==, tot << ashift); /* init RAIDZ parity ops */ - rm->rm_ops = vdev_raidz_math_get_ops(); + // rm->rm_ops = vdev_raidz_math_get_ops(); return (rm); } @@ -550,50 +832,43 @@ vdev_raidz_pqr_func(void *buf, size_t size, void *private) } static void -vdev_raidz_generate_parity_p(raidz_map_t *rm) +vdev_raidz_generate_parity_p(raidz_row_t *rr) { - uint64_t *p; - int c; - abd_t *src; + uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd); - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); + for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + abd_t *src = rr->rr_col[c].rc_abd; - if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + if (c == rr->rr_firstdatacol) { + abd_copy_to_buf(p, src, rr->rr_col[c].rc_size); } else { struct pqr_struct pqr = { p, NULL, NULL }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + (void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size, vdev_raidz_p_func, &pqr); } } } static void -vdev_raidz_generate_parity_pq(raidz_map_t *rm) +vdev_raidz_generate_parity_pq(raidz_row_t *rr) { - uint64_t *p, *q, pcnt, ccnt, mask, i; - int c; - abd_t *src; - - pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); - ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == - rm->rm_col[VDEV_RAIDZ_Q].rc_size); + uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd); + uint64_t *q = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd); + uint64_t pcnt = rr->rr_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); + ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size == + rr->rr_col[VDEV_RAIDZ_Q].rc_size); - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); + for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + abd_t *src = rr->rr_col[c].rc_abd; - ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); + uint64_t ccnt = rr->rr_col[c].rc_size / sizeof (p[0]); - if (c == rm->rm_firstdatacol) { + if (c == rr->rr_firstdatacol) { ASSERT(ccnt == pcnt || ccnt == 0); - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); + abd_copy_to_buf(p, src, rr->rr_col[c].rc_size); + (void) memcpy(q, p, rr->rr_col[c].rc_size); - for (i = ccnt; i < pcnt; i++) { + for (uint64_t i = ccnt; i < pcnt; i++) { p[i] = 0; q[i] = 0; } @@ -601,14 +876,15 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) struct pqr_struct pqr = { p, q, NULL }; ASSERT(ccnt <= pcnt); - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + (void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size, vdev_raidz_pq_func, &pqr); /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ - for (i = ccnt; i < pcnt; i++) { + uint64_t mask; + for (uint64_t i = ccnt; i < pcnt; i++) { VDEV_RAIDZ_64MUL_2(q[i], mask); } } @@ -616,33 +892,34 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) } static void -vdev_raidz_generate_parity_pqr(raidz_map_t *rm) +vdev_raidz_generate_parity_pqr(raidz_row_t *rr) { - uint64_t *p, *q, *r, pcnt, ccnt, mask, i; - int c; - abd_t *src; - - pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); - ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == - rm->rm_col[VDEV_RAIDZ_Q].rc_size); - ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == - rm->rm_col[VDEV_RAIDZ_R].rc_size); + uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd); + uint64_t *q = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd); + uint64_t *r = abd_to_buf(rr->rr_col[VDEV_RAIDZ_R].rc_abd); + uint64_t pcnt = rr->rr_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); + ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size == + rr->rr_col[VDEV_RAIDZ_Q].rc_size); + ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size == + rr->rr_col[VDEV_RAIDZ_R].rc_size); - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); - r = abd_to_buf(rm->rm_col[VDEV_RAIDZ_R].rc_abd); + for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + abd_t *src = rr->rr_col[c].rc_abd; - ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); + uint64_t ccnt = rr->rr_col[c].rc_size / sizeof (p[0]); - if (c == rm->rm_firstdatacol) { + if (c == rr->rr_firstdatacol) { ASSERT(ccnt == pcnt || ccnt == 0); - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); - (void) memcpy(r, p, rm->rm_col[c].rc_size); + abd_copy_to_buf(p, src, rr->rr_col[c].rc_size); + (void) memcpy(q, p, rr->rr_col[c].rc_size); + (void) memcpy(r, p, rr->rr_col[c].rc_size); - for (i = ccnt; i < pcnt; i++) { + for (uint64_t i = ccnt; i < pcnt; i++) { + /* + * XXX does this really happen? + * firstdatacol should be the same size as + * the parity cols + */ p[i] = 0; q[i] = 0; r[i] = 0; @@ -651,14 +928,15 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm) struct pqr_struct pqr = { p, q, r }; ASSERT(ccnt <= pcnt); - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + (void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size, vdev_raidz_pqr_func, &pqr); /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ - for (i = ccnt; i < pcnt; i++) { + uint64_t mask; + for (uint64_t i = ccnt; i < pcnt; i++) { VDEV_RAIDZ_64MUL_2(q[i], mask); VDEV_RAIDZ_64MUL_4(r[i], mask); } @@ -670,28 +948,49 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm) * Generate RAID parity in the first virtual columns according to the number of * parity columns available. */ -void -vdev_raidz_generate_parity(raidz_map_t *rm) +static void +vdev_raidz_generate_parity_row(raidz_row_t *rr) { + if (rr->rr_cols == 0) { + /* + * We are handling this block one row at a time (because + * this block has a different logical vs physical width, + * due to RAIDZ expansion), and this is a pad-only row, + * which has no parity. + */ + return; + } + +#if 0 /* Generate using the new math implementation */ if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL) return; +#endif - switch (rm->rm_firstdatacol) { + switch (rr->rr_firstdatacol) { case 1: - vdev_raidz_generate_parity_p(rm); + vdev_raidz_generate_parity_p(rr); break; case 2: - vdev_raidz_generate_parity_pq(rm); + vdev_raidz_generate_parity_pq(rr); break; case 3: - vdev_raidz_generate_parity_pqr(rm); + vdev_raidz_generate_parity_pqr(rr); break; default: cmn_err(CE_PANIC, "invalid RAID-Z configuration"); } } +void +vdev_raidz_generate_parity(raidz_map_t *rm) +{ + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + vdev_raidz_generate_parity_row(rr); + } +} + /* ARGSUSED */ static int vdev_raidz_reconst_p_func(void *dbuf, void *sbuf, size_t size, void *private) @@ -809,30 +1108,31 @@ vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private) } static int -vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) +vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts) { int x = tgts[0]; - int c; abd_t *dst, *src; - ASSERT(ntgts == 1); - ASSERT(x >= rm->rm_firstdatacol); - ASSERT(x < rm->rm_cols); + zfs_dbgmsg("reconstruct_p(rm=%p x=%u)", + rr, x); - ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_P].rc_size); - ASSERT(rm->rm_col[x].rc_size > 0); + ASSERT3U(ntgts, ==, 1); + ASSERT3U(x, >=, rr->rr_firstdatacol); + ASSERT3U(x, <, rr->rr_cols); - src = rm->rm_col[VDEV_RAIDZ_P].rc_abd; - dst = rm->rm_col[x].rc_abd; + ASSERT3U(rr->rr_col[x].rc_size, <=, rr->rr_col[VDEV_RAIDZ_P].rc_size); - abd_copy_from_buf(dst, abd_to_buf(src), rm->rm_col[x].rc_size); + src = rr->rr_col[VDEV_RAIDZ_P].rc_abd; + dst = rr->rr_col[x].rc_abd; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - uint64_t size = MIN(rm->rm_col[x].rc_size, - rm->rm_col[c].rc_size); + abd_copy_from_buf(dst, abd_to_buf(src), rr->rr_col[x].rc_size); - src = rm->rm_col[c].rc_abd; - dst = rm->rm_col[x].rc_abd; + for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + uint64_t size = MIN(rr->rr_col[x].rc_size, + rr->rr_col[c].rc_size); + + src = rr->rr_col[c].rc_abd; + dst = rr->rr_col[x].rc_abd; /* XXX not needed, done above */ if (c == x) continue; @@ -845,52 +1145,55 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) } static int -vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) +vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts) { int x = tgts[0]; int c, exp; abd_t *dst, *src; + zfs_dbgmsg("reconstruct_q(rm=%p x=%u)", + rr, x); + ASSERT(ntgts == 1); - ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_Q].rc_size); + ASSERT(rr->rr_col[x].rc_size <= rr->rr_col[VDEV_RAIDZ_Q].rc_size); - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - uint64_t size = (c == x) ? 0 : MIN(rm->rm_col[x].rc_size, - rm->rm_col[c].rc_size); + for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + uint64_t size = (c == x) ? 0 : MIN(rr->rr_col[x].rc_size, + rr->rr_col[c].rc_size); - src = rm->rm_col[c].rc_abd; - dst = rm->rm_col[x].rc_abd; + src = rr->rr_col[c].rc_abd; + dst = rr->rr_col[x].rc_abd; - if (c == rm->rm_firstdatacol) { + if (c == rr->rr_firstdatacol) { abd_copy(dst, src, size); - if (rm->rm_col[x].rc_size > size) + if (rr->rr_col[x].rc_size > size) { abd_zero_off(dst, size, - rm->rm_col[x].rc_size - size); - + rr->rr_col[x].rc_size - size); + } } else { - ASSERT3U(size, <=, rm->rm_col[x].rc_size); + ASSERT3U(size, <=, rr->rr_col[x].rc_size); (void) abd_iterate_func2(dst, src, 0, 0, size, vdev_raidz_reconst_q_pre_func, NULL); (void) abd_iterate_func(dst, - size, rm->rm_col[x].rc_size - size, + size, rr->rr_col[x].rc_size - size, vdev_raidz_reconst_q_pre_tail_func, NULL); } } - src = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; - dst = rm->rm_col[x].rc_abd; - exp = 255 - (rm->rm_cols - 1 - x); + src = rr->rr_col[VDEV_RAIDZ_Q].rc_abd; + dst = rr->rr_col[x].rc_abd; + exp = 255 - (rr->rr_cols - 1 - x); struct reconst_q_struct rq = { abd_to_buf(src), exp }; - (void) abd_iterate_func(dst, 0, rm->rm_col[x].rc_size, + (void) abd_iterate_func(dst, 0, rr->rr_col[x].rc_size, vdev_raidz_reconst_q_post_func, &rq); return (1 << VDEV_RAIDZ_Q); } static int -vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) +vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts) { uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp; abd_t *pdata, *qdata; @@ -899,12 +1202,15 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) int y = tgts[1]; abd_t *xd, *yd; + zfs_dbgmsg("reconstruct_pq(rm=%p x=%u y=%u)", + rr, x, y); + ASSERT(ntgts == 2); ASSERT(x < y); - ASSERT(x >= rm->rm_firstdatacol); - ASSERT(y < rm->rm_cols); + ASSERT(x >= rr->rr_firstdatacol); + ASSERT(y < rr->rr_cols); - ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size); + ASSERT(rr->rr_col[x].rc_size >= rr->rr_col[y].rc_size); /* * Move the parity data aside -- we're going to compute parity as @@ -913,29 +1219,29 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) * parity so we make those columns appear to be full of zeros by * setting their lengths to zero. */ - pdata = rm->rm_col[VDEV_RAIDZ_P].rc_abd; - qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; - xsize = rm->rm_col[x].rc_size; - ysize = rm->rm_col[y].rc_size; + pdata = rr->rr_col[VDEV_RAIDZ_P].rc_abd; + qdata = rr->rr_col[VDEV_RAIDZ_Q].rc_abd; + xsize = rr->rr_col[x].rc_size; + ysize = rr->rr_col[y].rc_size; - rm->rm_col[VDEV_RAIDZ_P].rc_abd = - abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_P].rc_size, B_TRUE); - rm->rm_col[VDEV_RAIDZ_Q].rc_abd = - abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_Q].rc_size, B_TRUE); - rm->rm_col[x].rc_size = 0; - rm->rm_col[y].rc_size = 0; + rr->rr_col[VDEV_RAIDZ_P].rc_abd = + abd_alloc_linear(rr->rr_col[VDEV_RAIDZ_P].rc_size, B_TRUE); + rr->rr_col[VDEV_RAIDZ_Q].rc_abd = + abd_alloc_linear(rr->rr_col[VDEV_RAIDZ_Q].rc_size, B_TRUE); + rr->rr_col[x].rc_size = 0; + rr->rr_col[y].rc_size = 0; - vdev_raidz_generate_parity_pq(rm); + vdev_raidz_generate_parity_pq(rr); - rm->rm_col[x].rc_size = xsize; - rm->rm_col[y].rc_size = ysize; + rr->rr_col[x].rc_size = xsize; + rr->rr_col[y].rc_size = ysize; p = abd_to_buf(pdata); q = abd_to_buf(qdata); - pxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - qxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); - xd = rm->rm_col[x].rc_abd; - yd = rm->rm_col[y].rc_abd; + pxy = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd); + qxy = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd); + xd = rr->rr_col[x].rc_abd; + yd = rr->rr_col[y].rc_abd; /* * We now have: @@ -953,7 +1259,7 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) */ a = vdev_raidz_pow2[255 + x - y]; - b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)]; + b = vdev_raidz_pow2[255 - (rr->rr_cols - 1 - x)]; tmp = 255 - vdev_raidz_log2[a ^ 1]; aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; @@ -967,14 +1273,14 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) (void) abd_iterate_func(xd, ysize, xsize - ysize, vdev_raidz_reconst_pq_tail_func, &rpq); - abd_free(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - abd_free(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); + abd_free(rr->rr_col[VDEV_RAIDZ_P].rc_abd); + abd_free(rr->rr_col[VDEV_RAIDZ_Q].rc_abd); /* * Restore the saved parity data. */ - rm->rm_col[VDEV_RAIDZ_P].rc_abd = pdata; - rm->rm_col[VDEV_RAIDZ_Q].rc_abd = qdata; + rr->rr_col[VDEV_RAIDZ_P].rc_abd = pdata; + rr->rr_col[VDEV_RAIDZ_Q].rc_abd = qdata; return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q)); } @@ -1134,13 +1440,13 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) /* END CSTYLED */ static void -vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map, +vdev_raidz_matrix_init(raidz_row_t *rr, int n, int nmap, int *map, uint8_t **rows) { int i, j; int pow; - ASSERT(n == rm->rm_cols - rm->rm_firstdatacol); + ASSERT(n == rr->rr_cols - rr->rr_firstdatacol); /* * Fill in the missing rows of interest. @@ -1164,7 +1470,7 @@ vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map, } static void -vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, +vdev_raidz_matrix_invert(raidz_row_t *rr, int n, int nmissing, int *missing, uint8_t **rows, uint8_t **invrows, const uint8_t *used) { int i, j, ii, jj; @@ -1176,10 +1482,10 @@ vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, * correspond to data columns. */ for (i = 0; i < nmissing; i++) { - ASSERT3S(used[i], <, rm->rm_firstdatacol); + ASSERT3S(used[i], <, rr->rr_firstdatacol); } for (; i < n; i++) { - ASSERT3S(used[i], >=, rm->rm_firstdatacol); + ASSERT3S(used[i], >=, rr->rr_firstdatacol); } /* @@ -1196,8 +1502,8 @@ vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, */ for (i = 0; i < nmissing; i++) { for (j = nmissing; j < n; j++) { - ASSERT3U(used[j], >=, rm->rm_firstdatacol); - jj = used[j] - rm->rm_firstdatacol; + ASSERT3U(used[j], >=, rr->rr_firstdatacol); + jj = used[j] - rr->rr_firstdatacol; ASSERT3S(jj, <, n); invrows[i][j] = rows[i][jj]; rows[i][jj] = 0; @@ -1258,7 +1564,7 @@ vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, } static void -vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, +vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing, int *missing, uint8_t **invrows, const uint8_t *used) { int i, j, x, cc, c; @@ -1290,22 +1596,24 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, for (i = 0; i < n; i++) { c = used[i]; - ASSERT3U(c, <, rm->rm_cols); + ASSERT3U(c, <, rr->rr_cols); - src = abd_to_buf(rm->rm_col[c].rc_abd); - ccount = rm->rm_col[c].rc_size; + ccount = rr->rr_col[c].rc_size; + ASSERT(ccount >= rr->rr_col[missing[0]].rc_size || i > 0); + if (ccount == 0) + continue; + src = abd_to_buf(rr->rr_col[c].rc_abd); for (j = 0; j < nmissing; j++) { - cc = missing[j] + rm->rm_firstdatacol; - ASSERT3U(cc, >=, rm->rm_firstdatacol); - ASSERT3U(cc, <, rm->rm_cols); + cc = missing[j] + rr->rr_firstdatacol; + ASSERT3U(cc, >=, rr->rr_firstdatacol); + ASSERT3U(cc, <, rr->rr_cols); ASSERT3U(cc, !=, c); - dst[j] = abd_to_buf(rm->rm_col[cc].rc_abd); - dcount[j] = rm->rm_col[cc].rc_size; + dcount[j] = rr->rr_col[cc].rc_size; + if (dcount[j] != 0) + dst[j] = abd_to_buf(rr->rr_col[cc].rc_abd); } - ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0); - for (x = 0; x < ccount; x++, src++) { if (*src != 0) log = vdev_raidz_log2[*src]; @@ -1334,13 +1642,15 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, } static int -vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) +vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts) { int n, i, c, t, tt; int nmissing_rows; int missing_rows[VDEV_RAIDZ_MAXPARITY]; int parity_map[VDEV_RAIDZ_MAXPARITY]; + zfs_dbgmsg("reconstruct_general(rm=%p ntgts=%u)", + rr, ntgts); uint8_t *p, *pp; size_t psize; @@ -1356,28 +1666,31 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) * Matrix reconstruction can't use scatter ABDs yet, so we allocate * temporary linear ABDs. */ - if (!abd_is_linear(rm->rm_col[rm->rm_firstdatacol].rc_abd)) { - bufs = kmem_alloc(rm->rm_cols * sizeof (abd_t *), KM_PUSHPAGE); + if (!abd_is_linear(rr->rr_col[rr->rr_firstdatacol].rc_abd)) { + bufs = kmem_alloc(rr->rr_cols * sizeof (abd_t *), KM_PUSHPAGE); - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - raidz_col_t *col = &rm->rm_col[c]; + for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + raidz_col_t *col = &rr->rr_col[c]; bufs[c] = col->rc_abd; - col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE); - abd_copy(col->rc_abd, bufs[c], col->rc_size); + if (bufs[c] != NULL) { + col->rc_abd = + abd_alloc_linear(col->rc_size, B_TRUE); + abd_copy(col->rc_abd, bufs[c], col->rc_size); + } } } - n = rm->rm_cols - rm->rm_firstdatacol; + n = rr->rr_cols - rr->rr_firstdatacol; /* * Figure out which data columns are missing. */ nmissing_rows = 0; for (t = 0; t < ntgts; t++) { - if (tgts[t] >= rm->rm_firstdatacol) { + if (tgts[t] >= rr->rr_firstdatacol) { missing_rows[nmissing_rows++] = - tgts[t] - rm->rm_firstdatacol; + tgts[t] - rr->rr_firstdatacol; } } @@ -1387,7 +1700,7 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) */ for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) { ASSERT(tt < ntgts); - ASSERT(c < rm->rm_firstdatacol); + ASSERT(c < rr->rr_firstdatacol); /* * Skip any targeted parity columns. @@ -1422,9 +1735,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) used[i] = parity_map[i]; } - for (tt = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + for (tt = 0, c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { if (tt < nmissing_rows && - c == missing_rows[tt] + rm->rm_firstdatacol) { + c == missing_rows[tt] + rr->rr_firstdatacol) { tt++; continue; } @@ -1437,18 +1750,18 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) /* * Initialize the interesting rows of the matrix. */ - vdev_raidz_matrix_init(rm, n, nmissing_rows, parity_map, rows); + vdev_raidz_matrix_init(rr, n, nmissing_rows, parity_map, rows); /* * Invert the matrix. */ - vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows, + vdev_raidz_matrix_invert(rr, n, nmissing_rows, missing_rows, rows, invrows, used); /* * Reconstruct the missing data using the generated matrix. */ - vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows, + vdev_raidz_matrix_reconstruct(rr, n, nmissing_rows, missing_rows, invrows, used); kmem_free(p, psize); @@ -1457,49 +1770,64 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) * copy back from temporary linear abds and free them */ if (bufs) { - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - raidz_col_t *col = &rm->rm_col[c]; + for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + raidz_col_t *col = &rr->rr_col[c]; - abd_copy(bufs[c], col->rc_abd, col->rc_size); - abd_free(col->rc_abd); + if (bufs[c] != NULL) { + abd_copy(bufs[c], col->rc_abd, col->rc_size); + abd_free(col->rc_abd); + } col->rc_abd = bufs[c]; } - kmem_free(bufs, rm->rm_cols * sizeof (abd_t *)); + kmem_free(bufs, rr->rr_cols * sizeof (abd_t *)); } return (code); } int -vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) +vdev_raidz_reconstruct_row(raidz_row_t *rr, const int *t, int nt) { int tgts[VDEV_RAIDZ_MAXPARITY], *dt; int ntgts; - int i, c, ret; + int i, c; int code; int nbadparity, nbaddata; int parity_valid[VDEV_RAIDZ_MAXPARITY]; + zfs_dbgmsg("reconstruct(rm=%p nt=%u cols=%u md=%u mp=%u)", + rr, nt, (int)rr->rr_cols, (int)rr->rr_missingdata, + (int)rr->rr_missingparity); + /* * The tgts list must already be sorted. */ + zfs_dbgmsg("reconstruct(rm=%p t[%u]=%u)", rr, 0, t[0]); for (i = 1; i < nt; i++) { + zfs_dbgmsg("reconstruct(rm=%p t[%u]=%u)", + rr, i, t[i]); ASSERT(t[i] > t[i - 1]); } - nbadparity = rm->rm_firstdatacol; - nbaddata = rm->rm_cols - nbadparity; + nbadparity = rr->rr_firstdatacol; + nbaddata = rr->rr_cols - nbadparity; ntgts = 0; - for (i = 0, c = 0; c < rm->rm_cols; c++) { - if (c < rm->rm_firstdatacol) + for (i = 0, c = 0; c < rr->rr_cols; c++) { + zfs_dbgmsg("reconstruct(rm=%p col=%u devid=%u " + "offset=%llx error=%u)", + rr, c, + (int)rr->rr_col[c].rc_devidx, + (long long)rr->rr_col[c].rc_offset, + (int)rr->rr_col[c].rc_error); + if (c < rr->rr_firstdatacol) parity_valid[c] = B_FALSE; if (i < nt && c == t[i]) { tgts[ntgts++] = c; i++; - } else if (rm->rm_col[c].rc_error != 0) { + } else if (rr->rr_col[c].rc_error != 0) { tgts[ntgts++] = c; - } else if (c >= rm->rm_firstdatacol) { + } else if (c >= rr->rr_firstdatacol) { nbaddata--; } else { parity_valid[c] = B_TRUE; @@ -1513,10 +1841,12 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) dt = &tgts[nbadparity]; +#if 0 /* Reconstruct using the new math implementation */ - ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata); + int ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata); if (ret != RAIDZ_ORIGINAL_IMPL) return (ret); +#endif /* * See if we can use any of our optimized reconstruction routines. @@ -1524,29 +1854,29 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) switch (nbaddata) { case 1: if (parity_valid[VDEV_RAIDZ_P]) - return (vdev_raidz_reconstruct_p(rm, dt, 1)); + return (vdev_raidz_reconstruct_p(rr, dt, 1)); - ASSERT(rm->rm_firstdatacol > 1); + ASSERT(rr->rr_firstdatacol > 1); if (parity_valid[VDEV_RAIDZ_Q]) - return (vdev_raidz_reconstruct_q(rm, dt, 1)); + return (vdev_raidz_reconstruct_q(rr, dt, 1)); - ASSERT(rm->rm_firstdatacol > 2); + ASSERT(rr->rr_firstdatacol > 2); break; case 2: - ASSERT(rm->rm_firstdatacol > 1); + ASSERT(rr->rr_firstdatacol > 1); if (parity_valid[VDEV_RAIDZ_P] && parity_valid[VDEV_RAIDZ_Q]) - return (vdev_raidz_reconstruct_pq(rm, dt, 2)); + return (vdev_raidz_reconstruct_pq(rr, dt, 2)); - ASSERT(rm->rm_firstdatacol > 2); + ASSERT(rr->rr_firstdatacol > 2); break; } - code = vdev_raidz_reconstruct_general(rm, tgts, ntgts); + code = vdev_raidz_reconstruct_general(rr, tgts, ntgts); ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY)); ASSERT(code > 0); return (code); @@ -1556,8 +1886,8 @@ static int vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, uint64_t *ashift) { - vdev_t *cvd; - uint64_t nparity = vd->vdev_nparity; + vdev_raidz_t *vdrz = vd->vdev_tsd; + uint64_t nparity = vdrz->vd_nparity; int c; int lasterror = 0; int numerrors = 0; @@ -1573,7 +1903,7 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, vdev_open_children(vd); for (c = 0; c < vd->vdev_children; c++) { - cvd = vd->vdev_child[c]; + vdev_t *cvd = vd->vdev_child[c]; if (cvd->vdev_open_error != 0) { lasterror = cvd->vdev_open_error; @@ -1609,10 +1939,11 @@ vdev_raidz_close(vdev_t *vd) static uint64_t vdev_raidz_asize(vdev_t *vd, uint64_t psize) { + vdev_raidz_t *vdrz = vd->vdev_tsd; uint64_t asize; uint64_t ashift = vd->vdev_top->vdev_ashift; - uint64_t cols = vd->vdev_children; - uint64_t nparity = vd->vdev_nparity; + uint64_t cols = vdrz->vd_logical_width; + uint64_t nparity = vdrz->vd_nparity; asize = ((psize - 1) >> ashift) + 1; asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity)); @@ -1631,8 +1962,9 @@ vdev_raidz_child_done(zio_t *zio) rc->rc_skipped = 0; } +#if 0 static void -vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, int col) +vdev_raidz_io_verify(zio_t *zio, raidz_row_t *rr, int col) { #ifdef ZFS_DEBUG vdev_t *vd = zio->io_vd; @@ -1643,7 +1975,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, int col) logical_rs.rs_end = logical_rs.rs_start + vdev_raidz_asize(zio->io_vd, zio->io_size); - raidz_col_t *rc = &rm->rm_col[col]; + raidz_col_t *rc = &rr->rr_col[col]; vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; vdev_xlate(cvd, &logical_rs, &physical_rs); @@ -1663,107 +1995,86 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, int col) } #endif } +#endif -/* - * Start an IO operation on a RAIDZ VDev - * - * Outline: - * - For write operations: - * 1. Generate the parity data - * 2. Create child zio write operations to each column's vdev, for both - * data and parity. - * 3. If the column skips any sectors for padding, create optional dummy - * write zio children for those areas to improve aggregation continuity. - * - For read operations: - * 1. Create child zio read operations to each data column's vdev to read - * the range of data required for zio. - * 2. If this is a scrub or resilver operation, or if any of the data - * vdevs have had errors, then create zio read operations to the parity - * columns' VDevs as well. - */ static void -vdev_raidz_io_start(zio_t *zio) +vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr) { vdev_t *vd = zio->io_vd; - vdev_t *tvd = vd->vdev_top; - vdev_t *cvd; - raidz_map_t *rm; - raidz_col_t *rc; - int c, i; - - rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children, - vd->vdev_nparity); - - ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size)); - if (zio->io_type == ZIO_TYPE_WRITE) { - vdev_raidz_generate_parity(rm); - - for (c = 0; c < rm->rm_cols; c++) { - rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_devidx]; - - /* - * Verify physical to logical translation. - */ - vdev_raidz_io_verify(zio, rm, c); + vdev_raidz_generate_parity_row(rr); - zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset, rc->rc_abd, rc->rc_size, - zio->io_type, zio->io_priority, 0, - vdev_raidz_child_done, rc)); - } + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_size == 0) + continue; + vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; - /* - * Generate optional I/Os for any skipped sectors to improve - * aggregation contiguity. - */ - for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) { - ASSERT(c <= rm->rm_scols); - if (c == rm->rm_scols) - c = 0; - rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_devidx]; - zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset + rc->rc_size, NULL, - 1 << tvd->vdev_ashift, - zio->io_type, zio->io_priority, - ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); - } + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset, rc->rc_abd, rc->rc_size, + zio->io_type, zio->io_priority, 0, + vdev_raidz_child_done, rc)); + } - zio_execute(zio); - return; + /* + * XXX do this in vdev_raidz_io_start, based on nskip stored in rm + */ +#if 0 + /* + * Generate optional I/Os for any skipped sectors to improve + * aggregation contiguity. + */ + vdev_t *tvd = vd->vdev_top; + for (int c = rr->rm_skipstart, i = 0; i < rr->rm_nskip; c++, i++) { + ASSERT(c <= rr->rm_scols); + if (c == rr->rm_scols) + c = 0; + raidz_col_t *rc = &rr->rr_col[c]; + vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset + rc->rc_size, NULL, + 1 << tvd->vdev_ashift, + zio->io_type, zio->io_priority, + ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); } +#endif +} - ASSERT(zio->io_type == ZIO_TYPE_READ); +static void +vdev_raidz_io_start_read(zio_t *zio, raidz_row_t *rr, boolean_t forceparity) +{ + vdev_t *vd = zio->io_vd; /* * Iterate over the columns in reverse order so that we hit the parity * last -- any errors along the way will force us to read the parity. */ - for (c = rm->rm_cols - 1; c >= 0; c--) { - rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_devidx]; + for (int c = rr->rr_cols - 1; c >= 0; c--) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_size == 0) + continue; + vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; if (!vdev_readable(cvd)) { - if (c >= rm->rm_firstdatacol) - rm->rm_missingdata++; + if (c >= rr->rr_firstdatacol) + rr->rr_missingdata++; else - rm->rm_missingparity++; + rr->rr_missingparity++; rc->rc_error = SET_ERROR(ENXIO); rc->rc_tried = 1; /* don't even try */ rc->rc_skipped = 1; continue; } if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) { - if (c >= rm->rm_firstdatacol) - rm->rm_missingdata++; + if (c >= rr->rr_firstdatacol) + rr->rr_missingdata++; else - rm->rm_missingparity++; + rr->rr_missingparity++; rc->rc_error = SET_ERROR(ESTALE); rc->rc_skipped = 1; continue; } - if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 || + if (forceparity || + c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 || (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { zio_nowait(zio_vdev_child_io(zio, NULL, cvd, rc->rc_offset, rc->rc_abd, rc->rc_size, @@ -1771,6 +2082,77 @@ vdev_raidz_io_start(zio_t *zio) vdev_raidz_child_done, rc)); } } +} + +/* + * Start an IO operation on a RAIDZ VDev + * + * Outline: + * - For write operations: + * 1. Generate the parity data + * 2. Create child zio write operations to each column's vdev, for both + * data and parity. + * 3. If the column skips any sectors for padding, create optional dummy + * write zio children for those areas to improve aggregation continuity. + * - For read operations: + * 1. Create child zio read operations to each data column's vdev to read + * the range of data required for zio. + * 2. If this is a scrub or resilver operation, or if any of the data + * vdevs have had errors, then create zio read operations to the parity + * columns' VDevs as well. + */ +static void +vdev_raidz_io_start(zio_t *zio) +{ + vdev_t *vd = zio->io_vd; + vdev_t *tvd = vd->vdev_top; + vdev_raidz_t *vdrz = vd->vdev_tsd; + raidz_map_t *rm; + + if (vdrz->vd_logical_width != vdrz->vd_physical_width) { + /* XXX rangelock not needed after expansion completes */ + locked_range_t *lr = + rangelock_enter(&vdrz->vn_vre.vre_rangelock, + zio->io_offset, zio->io_size, RL_READER); + + rm = vdev_raidz_map_alloc_expanded(zio->io_abd, + zio->io_size, zio->io_offset, + tvd->vdev_ashift, vdrz->vd_physical_width, + vdrz->vd_logical_width, vdrz->vd_nparity, + vdrz->vn_vre.vre_offset_phys); + rm->rm_lr = lr; + /* + * XXX If this is a write, will need to do additional + * writes to locations that are already copied, but + * not yet reflected in the on-disk format. + */ + } else { + rm = vdev_raidz_map_alloc(zio, + tvd->vdev_ashift, vdrz->vd_logical_width, + vdrz->vd_nparity); + } + + zio->io_vsd = rm; + zio->io_vsd_ops = &vdev_raidz_vsd_ops; + if (zio->io_type == ZIO_TYPE_WRITE) { + for (int i = 0; i < rm->rm_nrows; i++) { + vdev_raidz_io_start_write(zio, + rm->rm_row[i]); + } + } else { + ASSERT(zio->io_type == ZIO_TYPE_READ); + /* + * If there are multiple rows, we will be hitting + * all disks, so go ahead and read the parity so + * that we are reading in decent size chunks. + * XXX maybe doesn't really matter? + */ + boolean_t forceparity = rm->rm_nrows > 1; + for (int i = 0; i < rm->rm_nrows; i++) { + vdev_raidz_io_start_read(zio, + rm->rm_row[i], forceparity); + } + } zio_execute(zio); } @@ -1824,10 +2206,10 @@ raidz_checksum_verify(zio_t *zio) * Generate the parity from the data columns. If we tried and were able to * read the parity without error, verify that the generated parity matches the * data we read. If it doesn't, we fire off a checksum error. Return the - * number such failures. + * number of such failures. */ static int -raidz_parity_verify(zio_t *zio, raidz_map_t *rm) +raidz_parity_verify(zio_t *zio, raidz_row_t *rr) { abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int c, ret = 0; @@ -1840,8 +2222,8 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) if (checksum == ZIO_CHECKSUM_NOPARITY) return (ret); - for (c = 0; c < rm->rm_firstdatacol; c++) { - rc = &rm->rm_col[c]; + for (c = 0; c < rr->rr_firstdatacol; c++) { + rc = &rr->rr_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; @@ -1849,13 +2231,23 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) abd_copy(orig[c], rc->rc_abd, rc->rc_size); } - vdev_raidz_generate_parity(rm); + /* + * XXX regenerates parity even for !tried||rc_error!=0 + * This could cause a side effect of fixing stuff we didn't realize + * was necessary (i.e. even if we return 0) + */ + vdev_raidz_generate_parity_row(rr); + + for (c = 0; c < rr->rr_firstdatacol; c++) { + rc = &rr->rr_col[c]; - for (c = 0; c < rm->rm_firstdatacol; c++) { - rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; + if (abd_cmp(orig[c], rc->rc_abd) != 0) { + zfs_dbgmsg("raidz_parity_verify found error on " + "col=%u devidx=%u", + c, (int)rc->rc_devidx); raidz_checksum_error(zio, rc, orig[c]); rc->rc_error = SET_ERROR(ECKSUM); ret++; @@ -1867,464 +2259,668 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) } static int -vdev_raidz_worst_error(raidz_map_t *rm) +vdev_raidz_worst_error(raidz_row_t *rr) { int error = 0; - for (int c = 0; c < rm->rm_cols; c++) - error = zio_worst_error(error, rm->rm_col[c].rc_error); + for (int c = 0; c < rr->rr_cols; c++) + error = zio_worst_error(error, rr->rr_col[c].rc_error); return (error); } -/* - * Iterate over all combinations of bad data and attempt a reconstruction. - * Note that the algorithm below is non-optimal because it doesn't take into - * account how reconstruction is actually performed. For example, with - * triple-parity RAID-Z the reconstruction procedure is the same if column 4 - * is targeted as invalid as if columns 1 and 4 are targeted since in both - * cases we'd only use parity information in column 0. - */ -static int -vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) +static void +vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr) { - raidz_map_t *rm = zio->io_vsd; - raidz_col_t *rc; - abd_t *orig[VDEV_RAIDZ_MAXPARITY]; - int tstore[VDEV_RAIDZ_MAXPARITY + 2]; - int *tgts = &tstore[1]; - int curr, next, i, c, n; - int code, ret = 0; - - ASSERT(total_errors < rm->rm_firstdatacol); - - /* - * This simplifies one edge condition. - */ - tgts[-1] = -1; + int unexpected_errors = 0; + int parity_errors = 0; + int parity_untried = 0; + int data_errors = 0; - for (n = 1; n <= rm->rm_firstdatacol - total_errors; n++) { - /* - * Initialize the targets array by finding the first n columns - * that contain no error. - * - * If there were no data errors, we need to ensure that we're - * always explicitly attempting to reconstruct at least one - * data column. To do this, we simply push the highest target - * up into the data columns. - */ - for (c = 0, i = 0; i < n; i++) { - if (i == n - 1 && data_errors == 0 && - c < rm->rm_firstdatacol) { - c = rm->rm_firstdatacol; - } + ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); - while (rm->rm_col[c].rc_error != 0) { - c++; - ASSERT3S(c, <, rm->rm_cols); - } + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + + if (rc->rc_error) { + if (c < rr->rr_firstdatacol) + parity_errors++; + else + data_errors++; - tgts[i] = c++; + if (!rc->rc_skipped) + unexpected_errors++; + } else if (c < rr->rr_firstdatacol && !rc->rc_tried) { + parity_untried++; } + } - /* - * Setting tgts[n] simplifies the other edge condition. - */ - tgts[n] = rm->rm_cols; + /* + * If we read more parity disks than were used for + * reconstruction, confirm that the other parity disks produced + * correct data. + * + * Note that we also regenerate parity when resilvering so we + * can write it out to failed devices later. + */ + zfs_dbgmsg("parity_errors=%u parity_untried=%u data_errors=%u " + "verifying=%s", + parity_errors, parity_untried, data_errors, + (parity_errors + parity_untried < + rr->rr_firstdatacol - data_errors) ? "yes" : "no"); + if (parity_errors + parity_untried < + rr->rr_firstdatacol - data_errors || + (zio->io_flags & ZIO_FLAG_RESILVER)) { + int n = raidz_parity_verify(zio, rr); + unexpected_errors += n; + ASSERT3U(parity_errors + n, <=, rr->rr_firstdatacol); + } + if (zio->io_error == 0 && spa_writeable(zio->io_spa) && + (unexpected_errors > 0 || (zio->io_flags & ZIO_FLAG_RESILVER))) { /* - * These buffers were allocated in previous iterations. + * Use the good data we have in hand to repair damaged children. */ - for (i = 0; i < n - 1; i++) { - ASSERT(orig[i] != NULL); + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + vdev_t *vd = zio->io_vd; + vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; + + if (rc->rc_error == 0 || rc->rc_size == 0) + continue; + + zio_nowait(zio_vdev_child_io(zio, NULL, cvd, + rc->rc_offset, rc->rc_abd, rc->rc_size, + ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, + ZIO_FLAG_IO_REPAIR | (unexpected_errors ? + ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); } + } +} - orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd, - rm->rm_col[0].rc_size); +/* + * Iterate over all combinations of bad data and attempt a reconstruction. + * Note that the algorithm below is non-optimal because it doesn't take into + * account how reconstruction is actually performed. For example, with + * triple-parity RAID-Z the reconstruction procedure is the same if column 4 + * is targeted as invalid as if columns 1 and 4 are targeted since in both + * cases we'd only use parity information in column 0. + * + * The order that we find the various possible combinations of failed + * disks is dictated by these rules: + * - Examine each "slot" (the "i" in tgts[i]) + * - Try to increment this slot (tgts[i] = tgts[i] + 1) + * - if we can't increment because it runs into the next slot, + * reset our slot to the minimum, and examine the next slot + * For example, with a 6-wide RAIDZ3, and no known errors (so we have to choose + * 3 columns to reconstruct), we will generate the following sequence: + * + * STATE ACTION + * 0 1 2 special case: skip since these are all parity + * 0 1 3 first slot: reset to 0; middle slot: increment to 2 + * 0 2 3 first slot: increment to 1 + * 1 2 3 first: reset to 0; middle: reset to 1; last: increment to 4 + * 0 1 4 first: reset to 0; middle: increment to 2 + * 0 2 4 first: increment to 1 + * 1 2 4 first: reset to 0; middle: increment to 3 + * 0 3 4 first: increment to 1 + * 1 3 4 first: increment to 2 + * 2 3 4 first: reset to 0; middle: reset to 1; last: increment to 5 + * 0 1 5 first: reset to 0; middle: increment to 2 + * 0 2 5 first: increment to 1 + * 1 2 5 first: reset to 0; middle: increment to 3 + * 0 3 5 first: increment to 1 + * 1 3 5 first: increment to 2 + * 2 3 5 first: reset to 0; middle: increment to 4 + * 0 4 5 first: increment to 1 + * 1 4 5 first: increment to 2 + * 2 4 5 first: increment to 3 + * 3 4 5 done + */ - curr = 0; - next = tgts[curr]; +/* + * Should this sector be considered failed for logical child ID i? + * XXX comment explaining logical child ID's + */ +static boolean_t +raidz_simulate_failure(vdev_raidz_t *vdrz, int ashift, int i, raidz_col_t *rc) +{ + uint64_t sector_id = + vdrz->vd_physical_width * (rc->rc_offset >> ashift) + + rc->rc_devidx; + +#if 0 + zfs_dbgmsg("raidz_simulate_failure(pw=%u lw=%u ashift=%u i=%u " + "rc_offset=%llx rc_devidx=%u sector_id=%u", + vdrz->vd_physical_width, + vdrz->vd_logical_width, + ashift, + i, + (long long)rc->rc_offset, + (int)rc->rc_devidx, + (long long)sector_id); +#endif - while (curr != n) { - tgts[curr] = next; - curr = 0; + for (int w = vdrz->vd_physical_width; + w >= vdrz->vd_logical_width; w--) { + if (i < w) { + return (sector_id % w == i); + } else { + i -= w; + } + } + ASSERT(!"invalid logical child id"); + return (B_FALSE); +} - /* - * Save off the original data that we're going to - * attempt to reconstruct. - */ - for (i = 0; i < n; i++) { - ASSERT(orig[i] != NULL); - c = tgts[i]; - ASSERT3S(c, >=, 0); - ASSERT3S(c, <, rm->rm_cols); - rc = &rm->rm_col[c]; - abd_copy(orig[i], rc->rc_abd, rc->rc_size); +static void +raidz_restore_orig_data(raidz_map_t *rm) +{ + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_need_orig_restore) { + abd_copy_from_buf(rc->rc_abd, + rc->rc_orig_data, rc->rc_size); + rc->rc_need_orig_restore = B_FALSE; } + } + } +} - /* - * Attempt a reconstruction and exit the outer loop on - * success. - */ - code = vdev_raidz_reconstruct(rm, tgts, n); - if (raidz_checksum_verify(zio) == 0) { - - for (i = 0; i < n; i++) { - c = tgts[i]; - rc = &rm->rm_col[c]; - ASSERT(rc->rc_error == 0); - if (rc->rc_tried) - raidz_checksum_error(zio, rc, - orig[i]); - rc->rc_error = SET_ERROR(ECKSUM); +/* + * returns EINVAL if reconstruction of the block will not be possible + * returns ECKSUM if this specific reconstruction failed + * returns 0 on successful reconstruction + */ +static int +raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts) +{ + raidz_map_t *rm = zio->io_vsd; + vdev_raidz_t *vdrz = zio->io_vd->vdev_tsd; + + zfs_dbgmsg("raidz_reconstruct_expanded(zio=%p ltgts=%u,%u,%u ntgts=%u", + zio, ltgts[0], ltgts[1], ltgts[2], ntgts); + + /* Reconstruct each row */ + for (int r = 0; r < rm->rm_nrows; r++) { + raidz_row_t *rr = rm->rm_row[r]; + int my_tgts[VDEV_RAIDZ_MAXPARITY]; /* value is child id */ + int t = 0; + int dead = 0; + int dead_data = 0; + + zfs_dbgmsg("raidz_reconstruct_expanded(row=%u)", + r); + + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + ASSERT0(rc->rc_need_orig_restore); + if (rc->rc_error != 0) { + dead++; + if (c >= vdrz->vd_nparity) + dead_data++; + continue; + } + if (rc->rc_size == 0) + continue; + for (int lt = 0; lt < ntgts; lt++) { + if (raidz_simulate_failure(vdrz, + zio->io_vd->vdev_top->vdev_ashift, + ltgts[lt], rc)) { + if (rc->rc_orig_data == NULL) { + rc->rc_orig_data = + zio_buf_alloc(rc->rc_size); + abd_copy_to_buf( + rc->rc_orig_data, + rc->rc_abd, rc->rc_size); + } + rc->rc_need_orig_restore = B_TRUE; + + dead++; + if (c >= vdrz->vd_nparity) + dead_data++; + my_tgts[t++] = c; + zfs_dbgmsg("simulating failure of " + "col %u devidx %u", + c, (int)rc->rc_devidx); + break; } - - ret = code; - goto done; } + } + if (dead > vdrz->vd_nparity) { + /* reconstruction not possible */ + zfs_dbgmsg("reconstruction not possible; " + "too many failures"); + raidz_restore_orig_data(rm); + return (EINVAL); + } + /* XXX is rr_code used anywhere? */ + rr->rr_code = 0; + if (dead_data > 0) + rr->rr_code = vdev_raidz_reconstruct_row(rr, + my_tgts, t); + } - /* - * Restore the original data. - */ - for (i = 0; i < n; i++) { - c = tgts[i]; - rc = &rm->rm_col[c]; - abd_copy(rc->rc_abd, orig[i], rc->rc_size); + /* Check for success */ + if (raidz_checksum_verify(zio) == 0) { + + /* Reconstruction succeeded - report errors */ + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_need_orig_restore) { + /* + * Note: if this is a parity column, + * we don't really know if it's wrong. + * We need to let + * vdev_raidz_io_done_verified() check + * it, and if we set rc_error, it will + * think that it is a "known" error + * that doesn't need to be checked + * or corrected. + */ + if (rc->rc_error == 0 && + c >= rr->rr_firstdatacol) { + raidz_checksum_error(zio, + rc, rc->rc_gdata); + rc->rc_error = + SET_ERROR(ECKSUM); + } + rc->rc_need_orig_restore = B_FALSE; + } } - do { + vdev_raidz_io_done_verified(zio, rr); + } + + zio_checksum_verified(zio); + + zfs_dbgmsg("reconstruction successful (checksum verified)"); + return (0); + } + + /* Reconstruction failed - restore original data */ + raidz_restore_orig_data(rm); + zfs_dbgmsg("raidz_reconstruct_expanded(zio=%p) checksum failed", + zio); + return (ECKSUM); +} + +/* + * return 0 on success, ECKSUM on failure + */ +static int +vdev_raidz_combrec(zio_t *zio) +{ + vdev_raidz_t *vdrz = zio->io_vd->vdev_tsd; + + for (int num_failures = 1; num_failures <= vdrz->vd_nparity; + num_failures++) { + int tstore[VDEV_RAIDZ_MAXPARITY + 2]; + int *ltgts = &tstore[1]; /* value is logical child ID */ + + /* Determine number of logical children, n */ + int n = 0; + for (int w = vdrz->vd_physical_width; + w >= vdrz->vd_logical_width; w--) { + n += w; + } + + ASSERT3U(num_failures, <=, vdrz->vd_nparity); + ASSERT3U(num_failures, <=, VDEV_RAIDZ_MAXPARITY); + /* handle corner cases in combrec logic */ + ltgts[-1] = -1; + for (int i = 0; i < num_failures; i++) { + ltgts[i] = i; + } + ltgts[num_failures] = n; + + for (;;) { + int err = raidz_reconstruct(zio, + ltgts, num_failures); + if (err == EINVAL) { /* - * Find the next valid column after the curr - * position.. + * Reconstruction not possible with this # + * failures; try more failures. */ - for (next = tgts[curr] + 1; - next < rm->rm_cols && - rm->rm_col[next].rc_error != 0; next++) - continue; + break; + } else if (err == 0) + return (0); + + /* Compute next targets to try */ + for (int t = 0; ; t++) { + ASSERT3U(t, <, num_failures); + ltgts[t]++; + if (ltgts[t] == n) { + ASSERT3U(t, ==, num_failures - 1); + zfs_dbgmsg("reconstruction failed " + "for num_failures=%u; tried all " + "combinations", + num_failures); + break; // try more failures + } - ASSERT(next <= tgts[curr + 1]); + ASSERT3U(ltgts[t], <, n); + ASSERT3U(ltgts[t], <=, ltgts[t + 1]); /* * If that spot is available, we're done here. */ - if (next != tgts[curr + 1]) - break; + if (ltgts[t] != ltgts[t + 1]) + break; // found next combination /* - * Otherwise, find the next valid column after - * the previous position. + * Otherwise, reset this tgt to the minimum, + * and move on to the next tgt. */ - for (c = tgts[curr - 1] + 1; - rm->rm_col[c].rc_error != 0; c++) - continue; - - tgts[curr] = c; - curr++; - - } while (curr != n); + ltgts[t] = ltgts[t - 1] + 1; + ASSERT3U(ltgts[t], ==, t); + } + if (ltgts[num_failures - 1] == n) + break; // try more failures } } - n--; -done: - for (i = 0; i < n; i++) - abd_free(orig[i]); - - return (ret); + zfs_dbgmsg("reconstruction failed for all num_failures"); + return (ECKSUM); } /* - * Complete an IO operation on a RAIDZ VDev + * Complete a write IO operation on a RAIDZ VDev * * Outline: - * - For write operations: * 1. Check for errors on the child IOs. * 2. Return, setting an error code if too few child VDevs were written * to reconstruct the data later. Note that partial writes are * considered successful if they can be reconstructed at all. - * - For read operations: - * 1. Check for errors on the child IOs. - * 2. If data errors occurred: - * a. Try to reassemble the data from the parity available. - * b. If we haven't yet read the parity drives, read them now. - * c. If all parity drives have been read but the data still doesn't - * reassemble with a correct checksum, then try combinatorial - * reconstruction. - * d. If that doesn't work, return an error. - * 3. If there were unexpected errors or this is a resilver operation, - * rewrite the vdevs that had errors. */ static void -vdev_raidz_io_done(zio_t *zio) +vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr) +{ + int total_errors = 0; + + ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol); + ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol); + ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + + if (rc->rc_error) { + ASSERT(rc->rc_error != ECKSUM); /* child has no bp */ + + total_errors++; + } + } + + /* + * XXX -- for now, treat partial writes as a success. + * (If we couldn't write enough columns to reconstruct + * the data, the I/O failed. Otherwise, good enough.) + * + * Now that we support write reallocation, it would be better + * to treat partial failure as real failure unless there are + * no non-degraded top-level vdevs left, and not update DTLs + * if we intend to reallocate. + */ + /* XXPOLICY */ + if (total_errors > rr->rr_firstdatacol) { + zio->io_error = zio_worst_error(zio->io_error, + vdev_raidz_worst_error(rr)); + } +} + +/* + * return 0 if no reconstruction occurred, otherwise the "code" from + * vdev_raidz_reconstruct(). + */ +static int +vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_row_t *rr) { - vdev_t *vd = zio->io_vd; - vdev_t *cvd; - raidz_map_t *rm = zio->io_vsd; - raidz_col_t *rc = NULL; - int unexpected_errors = 0; int parity_errors = 0; int parity_untried = 0; int data_errors = 0; int total_errors = 0; - int n, c; - int tgts[VDEV_RAIDZ_MAXPARITY]; - int code; - - ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */ + int code = 0; - ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol); - ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol); + ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol); + ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol); + ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); - for (c = 0; c < rm->rm_cols; c++) { - rc = &rm->rm_col[c]; + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; if (rc->rc_error) { ASSERT(rc->rc_error != ECKSUM); /* child has no bp */ - if (c < rm->rm_firstdatacol) + if (c < rr->rr_firstdatacol) parity_errors++; else data_errors++; - if (!rc->rc_skipped) - unexpected_errors++; - total_errors++; - } else if (c < rm->rm_firstdatacol && !rc->rc_tried) { + } else if (c < rr->rr_firstdatacol && !rc->rc_tried) { parity_untried++; } } - if (zio->io_type == ZIO_TYPE_WRITE) { - /* - * XXX -- for now, treat partial writes as a success. - * (If we couldn't write enough columns to reconstruct - * the data, the I/O failed. Otherwise, good enough.) - * - * Now that we support write reallocation, it would be better - * to treat partial failure as real failure unless there are - * no non-degraded top-level vdevs left, and not update DTLs - * if we intend to reallocate. - */ - /* XXPOLICY */ - if (total_errors > rm->rm_firstdatacol) - zio->io_error = vdev_raidz_worst_error(rm); - - return; - } - - ASSERT(zio->io_type == ZIO_TYPE_READ); - /* - * There are three potential phases for a read: - * 1. produce valid data from the columns read - * 2. read all disks and try again - * 3. perform combinatorial reconstruction - * - * Each phase is progressively both more expensive and less likely to - * occur. If we encounter more errors than we can repair or all phases - * fail, we have no choice but to return an error. - */ - /* - * If the number of errors we saw was correctable -- less than or equal - * to the number of parity disks read -- attempt to produce data that - * has a valid checksum. Naturally, this case applies in the absence of - * any errors. + * If there were data errors and the number of errors we saw was + * correctable -- less than or equal to the number of parity disks read + * -- reconstruct based on the missing data. */ - if (total_errors <= rm->rm_firstdatacol - parity_untried) { - if (data_errors == 0) { - if (raidz_checksum_verify(zio) == 0) { - /* - * If we read parity information (unnecessarily - * as it happens since no reconstruction was - * needed) regenerate and verify the parity. - * We also regenerate parity when resilvering - * so we can write it out to the failed device - * later. - */ - if (parity_errors + parity_untried < - rm->rm_firstdatacol || - (zio->io_flags & ZIO_FLAG_RESILVER)) { - n = raidz_parity_verify(zio, rm); - unexpected_errors += n; - ASSERT(parity_errors + n <= - rm->rm_firstdatacol); - } - goto done; - } - } else { - /* - * We either attempt to read all the parity columns or - * none of them. If we didn't try to read parity, we - * wouldn't be here in the correctable case. There must - * also have been fewer parity errors than parity - * columns or, again, we wouldn't be in this code path. - */ - ASSERT(parity_untried == 0); - ASSERT(parity_errors < rm->rm_firstdatacol); + if (data_errors != 0 && + total_errors <= rr->rr_firstdatacol - parity_untried) { + /* + * We either attempt to read all the parity columns or + * none of them. If we didn't try to read parity, we + * wouldn't be here in the correctable case. There must + * also have been fewer parity errors than parity + * columns or, again, we wouldn't be in this code path. + */ + ASSERT(parity_untried == 0); + ASSERT(parity_errors < rr->rr_firstdatacol); - /* - * Identify the data columns that reported an error. - */ - n = 0; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - rc = &rm->rm_col[c]; - if (rc->rc_error != 0) { - ASSERT(n < VDEV_RAIDZ_MAXPARITY); - tgts[n++] = c; - } + /* + * Identify the data columns that reported an error. + */ + int n = 0; + int tgts[VDEV_RAIDZ_MAXPARITY]; + for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_error != 0) { + ASSERT(n < VDEV_RAIDZ_MAXPARITY); + tgts[n++] = c; } + } - ASSERT(rm->rm_firstdatacol >= n); + ASSERT(rr->rr_firstdatacol >= n); - code = vdev_raidz_reconstruct(rm, tgts, n); + code = vdev_raidz_reconstruct_row(rr, tgts, n); + } - if (raidz_checksum_verify(zio) == 0) { - /* - * If we read more parity disks than were used - * for reconstruction, confirm that the other - * parity disks produced correct data. This - * routine is suboptimal in that it regenerates - * the parity that we already used in addition - * to the parity that we're attempting to - * verify, but this should be a relatively - * uncommon case, and can be optimized if it - * becomes a problem. Note that we regenerate - * parity when resilvering so we can write it - * out to failed devices later. - */ - if (parity_errors < rm->rm_firstdatacol - n || - (zio->io_flags & ZIO_FLAG_RESILVER)) { - n = raidz_parity_verify(zio, rm); - unexpected_errors += n; - ASSERT(parity_errors + n <= - rm->rm_firstdatacol); - } + return (code); +} - goto done; - } - } - } +/* + * return the number of reads issued. + */ +static int +vdev_raidz_read_all(zio_t *zio, raidz_row_t *rr) +{ + vdev_t *vd = zio->io_vd; + int nread = 0; - /* - * This isn't a typical situation -- either we got a read error or - * a child silently returned bad data. Read every block so we can - * try again with as much data and parity as we can track down. If - * we've already been through once before, all children will be marked - * as tried so we'll proceed to combinatorial reconstruction. - */ - unexpected_errors = 1; - rm->rm_missingdata = 0; - rm->rm_missingparity = 0; + rr->rr_missingdata = 0; + rr->rr_missingparity = 0; - for (c = 0; c < rm->rm_cols; c++) { - if (rm->rm_col[c].rc_tried) + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_tried || rc->rc_size == 0) continue; - zio_vdev_io_redone(zio); - do { - rc = &rm->rm_col[c]; - if (rc->rc_tried) - continue; - zio_nowait(zio_vdev_child_io(zio, NULL, - vd->vdev_child[rc->rc_devidx], - rc->rc_offset, rc->rc_abd, rc->rc_size, - zio->io_type, zio->io_priority, 0, - vdev_raidz_child_done, rc)); - } while (++c < rm->rm_cols); - - return; + zio_nowait(zio_vdev_child_io(zio, NULL, + vd->vdev_child[rc->rc_devidx], + rc->rc_offset, rc->rc_abd, rc->rc_size, + zio->io_type, zio->io_priority, 0, + vdev_raidz_child_done, rc)); + nread++; } + return (nread); +} - /* - * At this point we've attempted to reconstruct the data given the - * errors we detected, and we've attempted to read all columns. There - * must, therefore, be one or more additional problems -- silent errors - * resulting in invalid data rather than explicit I/O errors resulting - * in absent data. We check if there is enough additional data to - * possibly reconstruct the data and then perform combinatorial - * reconstruction over all possible combinations. If that fails, - * we're cooked. - */ - if (total_errors > rm->rm_firstdatacol) { - zio->io_error = vdev_raidz_worst_error(rm); +static void +vdev_raidz_io_done(zio_t *zio) +{ + raidz_map_t *rm = zio->io_vsd; - } else if (total_errors < rm->rm_firstdatacol && - (code = vdev_raidz_combrec(zio, total_errors, data_errors)) != 0) { - /* - * If we didn't use all the available parity for the - * combinatorial reconstruction, verify that the remaining - * parity is correct. - */ - if (code != (1 << rm->rm_firstdatacol) - 1) - (void) raidz_parity_verify(zio, rm); + ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */ + if (zio->io_type == ZIO_TYPE_WRITE) { + for (int i = 0; i < rm->rm_nrows; i++) { + vdev_raidz_io_done_write_impl(zio, rm->rm_row[i]); + } } else { - /* - * We're here because either: - * - * total_errors == rm_first_datacol, or - * vdev_raidz_combrec() failed - * - * In either case, there is enough bad data to prevent - * reconstruction. - * - * Start checksum ereports for all children which haven't - * failed, and the IO wasn't speculative. - */ - zio->io_error = SET_ERROR(ECKSUM); - - if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - for (c = 0; c < rm->rm_cols; c++) { - vdev_t *cvd; - rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_devidx]; - if (rc->rc_error == 0) { - zio_bad_cksum_t zbc; - zbc.zbc_has_cksum = 0; - zbc.zbc_injected = - rm->rm_ecksuminjected; - - mutex_enter(&cvd->vdev_stat_lock); - cvd->vdev_stat.vs_checksum_errors++; - mutex_exit(&cvd->vdev_stat_lock); - - zfs_ereport_start_checksum( - zio->io_spa, cvd, - &zio->io_bookmark, zio, - rc->rc_offset, rc->rc_size, - (void *)(uintptr_t)c, &zbc); + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + rr->rr_code = + vdev_raidz_io_done_reconstruct_known_missing(zio, + rr); + } + + if (raidz_checksum_verify(zio) == 0) { + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + vdev_raidz_io_done_verified(zio, rr); + } + zio_checksum_verified(zio); + } else { + /* + * This isn't a typical situation -- either we got a + * read error or a child silently returned bad data. + * Read every block so we can try again with as much + * data and parity as we can track down. If we've + * already been through once before, all children will + * be marked as tried so we'll proceed to combinatorial + * reconstruction. + */ + int nread = 0; + for (int i = 0; i < rm->rm_nrows; i++) { + nread += vdev_raidz_read_all(zio, + rm->rm_row[i]); + } + if (nread != 0) { + /* + * Normally our stage is VDEV_IO_DONE, but if + * we've already called redone(), it will have + * changed to VDEV_IO_START, in which case we + * don't want to call redone() again. + */ + if (zio->io_stage != ZIO_STAGE_VDEV_IO_START) + zio_vdev_io_redone(zio); + return; + } + /* + * It would be too expensive to try every possible + * combination of failed sectors in every row, so + * instead we try every combination of failed current or + * past physical disk. This means that if the incorrect + * sectors were all on Nparity disks at any point in the + * past, we will find the correct data. I think that + * the only case where this is less durable than + * a non-expanded RAIDZ, is if we have a silent + * failure during expansion. In that case, one block + * could be partially in the old format and partially + * in the new format, so we'd lost some sectors + * from the old format and some from the new format. + * + * e.g. logical_width=4 physical_width=6 + * the 15 (6+5+4) possible failed disks are: + * width=6 child=0 + * width=6 child=1 + * width=6 child=2 + * width=6 child=3 + * width=6 child=4 + * width=6 child=5 + * width=5 child=0 + * width=5 child=1 + * width=5 child=2 + * width=5 child=3 + * width=5 child=4 + * width=4 child=0 + * width=4 child=1 + * width=4 child=2 + * width=4 child=3 + * And we will try every combination of Nparity of these + * failing. + * + * As a first pass, we can generate every combo, + * and try reconstructing, ignoring any known + * failures. If any row has too many known + simulated + * failures, then we bail on reconstructing with this + * number of simulated failures. As an improvement, + * we could detect the number of whole known failures + * (i.e. we have known failures on these disks for + * every row; the disks never succeeded), and + * subtract that from the max # failures to simulate. + * We could go even further like the current + * combrec code, but that doesn't seem like it + * gains us very much. If we simulate a failure + * that is also a known failure, that's fine. + */ + if (vdev_raidz_combrec(zio) != 0) { + /* + * We're here because either: + * + * total_errors == rm_first_datacol, or + * vdev_raidz_combrec() failed + * + * In either case, there is enough bad data to + * prevent reconstruction. + * + * Start checksum ereports for all children + * which haven't failed, and the IO wasn't + * speculative. + */ + zio->io_error = SET_ERROR(ECKSUM); + + if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + for (int i = 0; i < rm->rm_nrows; i++) { + raidz_row_t *rr = rm->rm_row[i]; + for (int c = 0; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + if (rc->rc_error == 0) { + zio_bad_cksum_t zbc; + zbc.zbc_has_cksum = 0; + zbc.zbc_injected = + rm->rm_ecksuminjected; + + zfs_ereport_start_checksum( + zio->io_spa, + zio->io_vd->vdev_child[rc->rc_devidx], + &zio->io_bookmark, + zio, rc->rc_offset, rc->rc_size, + (void *)(uintptr_t)c, &zbc); + } + } + } } } } } - -done: - zio_checksum_verified(zio); - - if (zio->io_error == 0 && spa_writeable(zio->io_spa) && - (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) { - /* - * Use the good data we have in hand to repair damaged children. - */ - for (c = 0; c < rm->rm_cols; c++) { - rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_devidx]; - - if (rc->rc_error == 0) - continue; - - zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset, rc->rc_abd, rc->rc_size, - ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, - ZIO_FLAG_IO_REPAIR | (unexpected_errors ? - ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); - } + if (rm->rm_lr != NULL) { + rangelock_exit(rm->rm_lr); + rm->rm_lr = NULL; } } static void vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded) { - if (faulted > vd->vdev_nparity) + vdev_raidz_t *vdrz = vd->vdev_tsd; + if (faulted > vdrz->vd_nparity) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_NO_REPLICAS); else if (degraded + faulted != 0) @@ -2377,6 +2973,7 @@ vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res) vdev_t *raidvd = cvd->vdev_parent; ASSERT(raidvd->vdev_ops == &vdev_raidz_ops); + /* XXX deal with different logical and physical widths */ uint64_t width = raidvd->vdev_children; uint64_t tgt_col = cvd->vdev_id; uint64_t ashift = raidvd->vdev_top->vdev_ashift; @@ -2402,6 +2999,646 @@ vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res) ASSERT3U(res->rs_end - res->rs_start, <=, in->rs_end - in->rs_start); } +static void +raidz_reflow_sync(void *arg, dmu_tx_t *tx) +{ + spa_t *spa = arg; + int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; + vdev_raidz_expand_t *vre = spa->spa_raidz_expand; + ASSERT3U(vre->vre_offset_pertxg[txgoff], >=, vre->vre_offset_phys); + + /* + * Ensure there are no i/os to the range that is being committed. + * XXX This might be overkill? + */ + locked_range_t *lr = rangelock_enter(&vre->vre_rangelock, + vre->vre_offset_phys, + vre->vre_offset_pertxg[txgoff] - vre->vre_offset_phys, + RL_WRITER); + /* + * XXX this needs to happen after the txg is synced, for + * purposes of determining if we can overwrite it. + */ + vre->vre_offset_phys = vre->vre_offset_pertxg[txgoff]; + vre->vre_offset_pertxg[txgoff] = 0; + rangelock_exit(lr); + + /* + * vre_offset_phys will be added to the on-disk config by + * vdev_raidz_config_generate(). + * XXX updating the label config every txg, and relying on it + * to be able to read from this RAIDZ, seems not great. Should + * we just try both old and new locations until we can read the + * real offset from the MOS? Or rely on ditto blocks? + */ + vdev_t *vd = vdev_lookup_top(spa, vre->vre_vdev_id); + vdev_config_dirty(vd); +} + +static void +raidz_reflow_complete_sync(void *arg, dmu_tx_t *tx) +{ + spa_t *spa = arg; + vdev_raidz_expand_t *vre = spa->spa_raidz_expand; + + for (int i = 0; i < TXG_SIZE; i++) + ASSERT0(vre->vre_offset_pertxg[i]); + + vre->vre_offset_phys = UINT64_MAX; + + /* + * vre_offset_phys will be removed from the on-disk config by + * vdev_raidz_config_generate(). + */ + vdev_t *vd = vdev_lookup_top(spa, vre->vre_vdev_id); + vdev_config_dirty(vd); + + vre->vre_end_time = gethrestime_sec(); + vre->vre_state = DSS_FINISHED; + + uint64_t state = vre->vre_state; + VERIFY0(zap_update(spa->spa_meta_objset, + vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE, + sizeof (state), 1, &state, tx)); + + uint64_t end_time = vre->vre_end_time; + VERIFY0(zap_update(spa->spa_meta_objset, + vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME, + sizeof (end_time), 1, &end_time, tx)); + + spa_history_log_internal(spa, "raidz vdev expansion completed", tx, + "%s vdev %llu new width %llu", spa_name(spa), + vd->vdev_id, vd->vdev_children); +} + +/* + * Struct for one copy zio. + */ +typedef struct raidz_reflow_arg { + vdev_raidz_expand_t *rra_vre; + locked_range_t *rra_lr; +} raidz_reflow_arg_t; + +/* + * The write of the new location is done. + */ +static void +raidz_reflow_write_done(zio_t *zio) +{ + raidz_reflow_arg_t *rra = zio->io_private; + vdev_raidz_expand_t *vre = rra->rra_vre; + + abd_free(zio->io_abd); + + mutex_enter(&vre->vre_lock); + ASSERT3U(vre->vre_outstanding_bytes, >=, zio->io_size); + vre->vre_outstanding_bytes -= zio->io_size; + cv_signal(&vre->vre_cv); + mutex_exit(&vre->vre_lock); + + rangelock_exit(rra->rra_lr); + + kmem_free(rra, sizeof (*rra)); + spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); +} + +/* + * The read of the old location is done. The parent zio is the write to + * the new location. Allow it to start. + */ +static void +raidz_reflow_read_done(zio_t *zio) +{ + zio_nowait(zio_unique_parent(zio)); +} + +static boolean_t +raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, + dmu_tx_t *tx) +{ + spa_t *spa = vd->vdev_spa; + int ashift = vd->vdev_top->vdev_ashift; + range_seg_t *rs = avl_first(&rt->rt_root); + if (rs == NULL) + return (B_FALSE); + uint64_t offset = rs->rs_start; + ASSERT(IS_P2ALIGNED(offset, 1 << ashift)); + ASSERT3U(rs->rs_end - rs->rs_start, >=, 1 << ashift); + uint64_t length = 1 << ashift; + int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; + + uint64_t blkid = offset >> ashift; + + int old_children = vd->vdev_children - 1; + + /* + * Record the fact that we've completed up to the beginning + * of this segment. This is important since there could be + * an unallocated segment preceding this, and the overwrite-check + * code needs to know that we have processed up to this point. + */ + mutex_enter(&vre->vre_lock); + vre->vre_offset = offset; + mutex_exit(&vre->vre_lock); + if (vre->vre_offset > 0 && vre->vre_offset_pertxg[txgoff] == 0) { + dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync, + spa, 0, ZFS_SPACE_CHECK_NONE, tx); + } + vre->vre_offset_pertxg[txgoff] = vre->vre_offset; + + /* + * If this would cause us to overwrite a block whose progress has not + * yet been committed to disk, return TRUE indicating that we need + * to try again in the next txg. + */ + uint64_t overwrite_blkid = + (blkid / vd->vdev_children) * old_children + + (blkid % vd->vdev_children); + /* XXX allow overwrite of first row for now */ + if (blkid > vd->vdev_children && + overwrite_blkid << ashift >= vre->vre_offset_phys) { + zfs_dbgmsg("copying offset %llu, vre_offset_phys %llu, " + "wait for txg %llu", + (long long)offset, + (long long)vre->vre_offset_phys, + (long long)dmu_tx_get_txg(tx)); + return (B_TRUE); + } + + range_tree_remove(rt, offset, length); + + raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra), KM_SLEEP); + rra->rra_vre = vre; + rra->rra_lr = rangelock_enter(&vre->vre_rangelock, + offset, length, RL_WRITER); + + mutex_enter(&vre->vre_lock); + ASSERT3U(vre->vre_offset, <=, offset); + vre->vre_offset = offset + length; + vre->vre_outstanding_bytes += length; + mutex_exit(&vre->vre_lock); + +#if 0 /* XXX already done above */ + if (vre->vre_offset_pertxg[txgoff] == 0) { + dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync, + spa, 0, ZFS_SPACE_CHECK_NONE, tx); + } + vre->vre_offset_pertxg[txgoff] = vre->vre_offset; +#endif + + /* + * SCL_STATE will be released when the read and write are done, + * by raidz_reflow_write_done(). + */ + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + + zio_t *pio = spa->spa_txg_zio[txgoff]; + abd_t *abd = abd_alloc_for_io(length, B_FALSE); + zio_t *write_zio = zio_vdev_child_io(pio, NULL, + vd->vdev_child[blkid % vd->vdev_children], + (blkid / vd->vdev_children) << ashift, + abd, length, + ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL, + ZIO_FLAG_CANFAIL, + raidz_reflow_write_done, rra); + + zio_nowait(zio_vdev_child_io(write_zio, NULL, + vd->vdev_child[blkid % old_children], + (blkid / old_children) << ashift, + abd, length, + ZIO_TYPE_READ, ZIO_PRIORITY_REMOVAL, + ZIO_FLAG_CANFAIL, + raidz_reflow_read_done, rra)); + + return (B_FALSE); +} + +/* ARGSUSED */ +static boolean_t +spa_raidz_expand_cb_check(void *arg, zthr_t *zthr) +{ + spa_t *spa = arg; + + return (spa->spa_raidz_expand != NULL); +} + +/* ARGSUSED */ +static void +spa_raidz_expand_cb(void *arg, zthr_t *zthr) +{ + spa_t *spa = arg; + vdev_raidz_expand_t *vre = spa->spa_raidz_expand; + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + vdev_t *raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); + + uint64_t guid = raidvd->vdev_guid; + + for (uint64_t i = vre->vre_offset >> raidvd->vdev_ms_shift; + i < raidvd->vdev_ms_count && + !zthr_iscancelled(spa->spa_raidz_expand_zthr); i++) { + metaslab_t *msp = raidvd->vdev_ms[i]; + + metaslab_disable(msp); + mutex_enter(&msp->ms_lock); + + /* + * The metaslab may be newly created (for the expanded + * space), in which case its trees won't exist yet, + * so we need to bail out early. + */ + if (msp->ms_new) { + mutex_exit(&msp->ms_lock); + metaslab_enable(msp, B_FALSE); + continue; + } + + VERIFY0(metaslab_load(msp)); + + /* + * We want to copy everything except the free (allocatable) + * space. Note that there may be a little bit more free + * space (e.g. in ms_defer), and it's fine to copy that too. + */ + range_tree_t *rt = range_tree_create(NULL, NULL); + range_tree_add(rt, msp->ms_start, msp->ms_size); + range_tree_walk(msp->ms_allocatable, range_tree_remove, rt); + mutex_exit(&msp->ms_lock); + + /* + * When we are resuming from a paused expansion (i.e. + * when importing a pool with a expansion in progress), + * discard any state that we have already processed. + */ + range_tree_clear(rt, 0, vre->vre_offset); + + while (!zthr_iscancelled(spa->spa_raidz_expand_zthr) && + !range_tree_is_empty(rt)) { + + /* + * We need to periodically drop the config lock so that + * writers can get in. Additionally, we can't wait + * for a txg to sync while holding a config lock + * (since a waiting writer could cause a 3-way deadlock + * with the sync thread, which also gets a config + * lock for reader). So we can't hold the config lock + * while calling dmu_tx_assign(). + */ + spa_config_exit(spa, SCL_CONFIG, FTAG); + + /* + * This delay will pause the removal around the point + * specified by zfs_remove_max_bytes_pause. We do this + * solely from the test suite or during debugging. + */ + /* XXX change to amount copied? */ + while (zfs_raidz_expand_max_offset_pause <= + vre->vre_offset && + !zthr_iscancelled(spa->spa_raidz_expand_zthr)) + delay(hz); + + mutex_enter(&vre->vre_lock); + while (vre->vre_outstanding_bytes > + zfs_raidz_expand_max_copy_bytes) { + cv_wait(&vre->vre_cv, &vre->vre_lock); + } + + mutex_exit(&vre->vre_lock); + + dmu_tx_t *tx = + dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); + uint64_t txg = dmu_tx_get_txg(tx); + + /* + * Reacquire the vdev_config lock. Theoretically, the + * vdev_t that we're expanding may have changed. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); + + boolean_t needsync = + raidz_reflow_impl(raidvd, vre, rt, tx); + + dmu_tx_commit(tx); + + if (needsync) { + spa_config_exit(spa, SCL_CONFIG, FTAG); + txg_wait_synced(spa->spa_dsl_pool, txg); + spa_config_enter(spa, SCL_CONFIG, FTAG, + RW_READER); + } + } + + spa_config_exit(spa, SCL_CONFIG, FTAG); + + /* + * XXX If we did a txg sync (at least) once per metaslab, + * (e.g. by passing TRUE to metaslab_enable) + * then we should be able to rely on the triple-dittoing + * of the MOS to ensure we can read the MOS config telling + * us how far we've copied. That's assuming that we are + * able to allocate the different DVA's on different metaslabs. + */ + +#if 0 /* XXX should not be necessary */ + mutex_enter(&vre->vre_lock); + vre->vre_offset = (msp->ms_id + 1) * msp->ms_size; + mutex_exit(&vre->vre_lock); +#endif + + metaslab_enable(msp, B_FALSE); + range_tree_vacate(rt, NULL, NULL); + range_tree_destroy(rt); + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + raidvd = vdev_lookup_top(spa, vre->vre_vdev_id); + } + + spa_config_exit(spa, SCL_CONFIG, FTAG); + + /* + * Wait for all copy zio's to complete and for all the + * raidz_reflow_sync() synctasks to be run. If we are not being + * canceled, then the reflow must be complete. In that case also + * mark it as completed on disk. + */ + if (!zthr_iscancelled(spa->spa_raidz_expand_zthr)) { + VERIFY0(dsl_sync_task(spa_name(spa), NULL, + raidz_reflow_complete_sync, spa, + 0, ZFS_SPACE_CHECK_NONE)); + (void) vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL); + } else { + txg_wait_synced(spa->spa_dsl_pool, 0); + } + + spa->spa_raidz_expand = NULL; +} + +void +spa_start_raidz_expansion_thread(spa_t *spa) +{ + ASSERT3P(spa->spa_raidz_expand_zthr, ==, NULL); + spa->spa_raidz_expand_zthr = zthr_create(spa_raidz_expand_cb_check, + spa_raidz_expand_cb, spa); +} + +void +vdev_raidz_attach_sync(void *arg, dmu_tx_t *tx) +{ + vdev_t *new_child = arg; + spa_t *spa = new_child->vdev_spa; + vdev_t *raidvd = new_child->vdev_parent; + vdev_raidz_t *vdrz = raidvd->vdev_tsd; + ASSERT3P(raidvd->vdev_ops, ==, &vdev_raidz_ops); + ASSERT3P(raidvd->vdev_top, ==, raidvd); + ASSERT3U(raidvd->vdev_children, >, vdrz->vd_logical_width); + ASSERT3U(raidvd->vdev_children, ==, vdrz->vd_physical_width + 1); + ASSERT3P(raidvd->vdev_child[raidvd->vdev_children - 1], ==, + new_child); + + vdrz->vd_physical_width++; + + vdrz->vn_vre.vre_vdev_id = raidvd->vdev_id; + vdrz->vn_vre.vre_offset = 0; + vdrz->vn_vre.vre_offset_phys = 0; + spa->spa_raidz_expand = &vdrz->vn_vre; + zthr_wakeup(spa->spa_raidz_expand_zthr); + + /* Ensure that widths get written to label config */ + vdev_config_dirty(raidvd); + + vdrz->vn_vre.vre_start_time = gethrestime_sec(); + vdrz->vn_vre.vre_end_time = 0; + vdrz->vn_vre.vre_state = DSS_SCANNING; + + uint64_t state = vdrz->vn_vre.vre_state; + VERIFY0(zap_update(spa->spa_meta_objset, + raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE, + sizeof (state), 1, &state, tx)); + + uint64_t start_time = vdrz->vn_vre.vre_start_time; + VERIFY0(zap_update(spa->spa_meta_objset, + raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME, + sizeof (start_time), 1, &start_time, tx)); + + (void) zap_remove(spa->spa_meta_objset, + raidvd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME, tx); + + spa_history_log_internal(spa, "raidz vdev expansion started", tx, + "%s vdev %llu new width %llu", spa_name(spa), + raidvd->vdev_id, raidvd->vdev_children); +} + +/* + * Add RAIDZ-specific fields to the config nvlist. + * XXX add this to vdev_ops_t? + */ +void +vdev_raidz_config_generate(vdev_t *vd, nvlist_t *nv) +{ + spa_t *spa = vd->vdev_spa; + ASSERT3P(vd->vdev_ops, ==, &vdev_raidz_ops); + vdev_raidz_t *vdrz = vd->vdev_tsd; + + /* + * Make sure someone hasn't managed to sneak a fancy new vdev + * into a crufty old storage pool. + */ + ASSERT(vdrz->vd_nparity == 1 || + (vdrz->vd_nparity <= 2 && + spa_version(spa) >= SPA_VERSION_RAIDZ2) || + (vdrz->vd_nparity <= 3 && + spa_version(spa) >= SPA_VERSION_RAIDZ3)); + + /* + * Note that we'll add these even on storage pools where they + * aren't strictly required -- older software will just ignore + * it. + */ + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vdrz->vd_nparity); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_RAIDZ_LOGICAL_WIDTH, + vdrz->vd_logical_width); + if (vdrz->vn_vre.vre_offset_phys != UINT64_MAX) { + fnvlist_add_uint64(nv, ZPOOL_CONFIG_RAIDZ_EXPAND_OFFSET, + vdrz->vn_vre.vre_offset_phys); + } +} + +/* + * Set RAIDZ-specific fields in the vdev_t, based on the config. + * Can't assume that anything about the vdev_t is already set. + * XXX add this to vdev_ops_t? + */ +void * +vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv) +{ + uint64_t nparity, lw; + vdev_raidz_t *vdrz = kmem_zalloc(sizeof (*vdrz), KM_SLEEP); + + vdrz->vn_vre.vre_vdev_id = -1; + vdrz->vn_vre.vre_offset = UINT64_MAX; + vdrz->vn_vre.vre_offset_phys = UINT64_MAX; + mutex_init(&vdrz->vn_vre.vre_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&vdrz->vn_vre.vre_cv, NULL, CV_DEFAULT, NULL); + rangelock_init(&vdrz->vn_vre.vre_rangelock, NULL, NULL); + + uint_t children; + nvlist_t **child; + int error = nvlist_lookup_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, &child, &children); + if (error != 0) + goto out; + + vdrz->vd_logical_width = children; + vdrz->vd_physical_width = children; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RAIDZ_LOGICAL_WIDTH, + &lw) == 0) { + vdrz->vd_logical_width = lw; + } + + /* note, the ID does not exist when creating a pool */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &vdrz->vn_vre.vre_vdev_id); + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RAIDZ_EXPAND_OFFSET, + &vdrz->vn_vre.vre_offset_phys) == 0) { + vdrz->vn_vre.vre_offset = vdrz->vn_vre.vre_offset_phys; + + /* + * vdev_load() will set spa_raidz_expand. + */ + } + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, + &nparity) == 0) { + if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY) + goto out; + /* + * Previous versions could only support 1 or 2 parity + * device. + */ + if (nparity > 1 && + spa_version(spa) < SPA_VERSION_RAIDZ2) + goto out; + if (nparity > 2 && + spa_version(spa) < SPA_VERSION_RAIDZ3) + goto out; + } else { + /* + * We require the parity to be specified for SPAs that + * support multiple parity levels. + */ + if (spa_version(spa) >= SPA_VERSION_RAIDZ2) + goto out; + /* + * Otherwise, we default to 1 parity device for RAID-Z. + */ + nparity = 1; + } + vdrz->vd_nparity = nparity; + return (vdrz); +out: + kmem_free(vdrz, sizeof (*vdrz)); + return (NULL); +} + +int +vdev_raidz_load(vdev_t *vd) +{ + vdev_raidz_t *vdrz = vd->vdev_tsd; + int err; + + /* + * XXX is it possible for the expansion to have started but + * offset==0 because we haven't made any progress yet? + * + * The offset is stored in the config, so we already have it from + * vdev_raidz_get_tsd(). + */ + if (vdrz->vn_vre.vre_offset != UINT64_MAX) { + ASSERT3U(vdrz->vn_vre.vre_vdev_id, ==, vd->vdev_id); + /* There can only be one expansion at a time. */ + ASSERT0(vd->vdev_spa->spa_raidz_expand); + + vd->vdev_spa->spa_raidz_expand = &vdrz->vn_vre; + } + + uint64_t state = DSS_SCANNING; + err = zap_lookup(vd->vdev_spa->spa_meta_objset, + vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE, + sizeof (state), 1, &state); + if (err != 0 && err != ENOENT) + return (err); + vdrz->vn_vre.vre_state = (dsl_scan_state_t)state; + + uint64_t start_time = 0; + err = zap_lookup(vd->vdev_spa->spa_meta_objset, + vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME, + sizeof (start_time), 1, &start_time); + if (err != 0 && err != ENOENT) + return (err); + vdrz->vn_vre.vre_start_time = (time_t)start_time; + + uint64_t end_time = 0; + err = zap_lookup(vd->vdev_spa->spa_meta_objset, + vd->vdev_top_zap, VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME, + sizeof (end_time), 1, &end_time); + if (err != 0 && err != ENOENT) + return (err); + vdrz->vn_vre.vre_end_time = (time_t)end_time; + + return (0); +} + +int +spa_raidz_expand_get_stats(spa_t *spa, pool_raidz_expand_stat_t *pres) +{ + vdev_raidz_expand_t *vre = spa->spa_raidz_expand; + + if (vre == NULL) { + /* no removal in progress; find most recent completed */ + for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) { + vdev_t *vd = spa->spa_root_vdev->vdev_child[c]; + if (vd->vdev_ops == &vdev_raidz_ops) { + vdev_raidz_t *vdrz = vd->vdev_tsd; + + if (vdrz->vn_vre.vre_end_time != 0 && + (vre == NULL || + vdrz->vn_vre.vre_end_time > + vre->vre_end_time)) { + vre = &vdrz->vn_vre; + } + } + } + } + + if (vre == NULL) { + return (SET_ERROR(ENOENT)); + } + + pres->pres_state = vre->vre_state; + pres->pres_expanding_vdev = vre->vre_vdev_id; + + /* XXX convert this to be bytes copied rather than offset reached */ + vdev_t *vd = vdev_lookup_top(spa, vre->vre_vdev_id); + pres->pres_to_reflow = vd->vdev_asize; + if (pres->pres_state == DSS_FINISHED) { + /* XXX store bytes copied on disk? */ + pres->pres_reflowed = vd->vdev_asize; + } else { + pres->pres_reflowed = vre->vre_offset; + } + + pres->pres_start_time = vre->vre_start_time; + pres->pres_end_time = vre->vre_end_time; + + return (0); +} + + vdev_ops_t vdev_raidz_ops = { .vdev_op_open = vdev_raidz_open, .vdev_op_close = vdev_raidz_close, diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h index ea592c0f12d..bec9506c7d0 100644 --- a/module/zfs/vdev_raidz_math_impl.h +++ b/module/zfs/vdev_raidz_math_impl.h @@ -26,6 +26,7 @@ #define _VDEV_RAIDZ_MATH_IMPL_H #include +#include #define raidz_inline inline __attribute__((always_inline)) #ifndef noinline diff --git a/scripts/raidz_expand_test.sh b/scripts/raidz_expand_test.sh new file mode 100755 index 00000000000..76ac9ce4a0a --- /dev/null +++ b/scripts/raidz_expand_test.sh @@ -0,0 +1,139 @@ +#!/bin/bash -x + +combrec=1 +BASE_DIR=$(dirname "$0")/.. + +echo 1 >/sys/module/zfs/parameters/zfs_prefetch_disable + +zpool destroy test +zpool create filepool sdb + +zfs destroy -R filepool/files +zfs create -o compression=on filepool/files + +dir=/filepool/files + +for (( i=0; i<7; i=i+1 )); do + truncate -s 512M $dir/$i +done + +function wait_completion +{ + while zpool status test | grep "in progress"; do + sleep 5 + done +} + +function dotest +{ + nparity=$1 + + zpool create -o cachefile=none test raidz$nparity $dir/[0-5] + zfs set primarycache=metadata test + + zfs create test/fs + dd if=/dev/urandom of=/test/fs/file bs=1024k count=1 + + zfs create -o compress=on test/fs2 + cp -r $BASE_DIR /test/fs2/ + #truncate -s 100m /test/fs2/file + #/net/pharos/export/home/mahrens/randwritecomp-linux /test/fs2/file 10000 + + zfs create -o compress=on -o recordsize=8k test/fs3 + cp -r $BASE_DIR /test/fs3/ + #truncate -s 100m /test/fs3/file + #/net/pharos/export/home/mahrens/randwritecomp-linux /test/fs3/file 10000 + + zfs snapshot filepool/files@pre-attach + + sum /test/fs/file + sum /test/fs2/file + sum /test/fs3/file + + zfs list test + zpool list -v test + + sleep 2 + + zpool attach test raidz$nparity-0 $dir/6 + + wait_completion + + zfs list test + zpool list -v test + # should indicate new device is present, pool is larger size + + zfs snapshot filepool/files@post-attach + + zpool export test + zpool import -o cachefile=none -d $dir test + + zfs snapshot filepool/files@post-import + + sum /test/fs/file + sum /test/fs2/file + sum /test/fs3/file + zfs list -r test + zpool list -v test + zpool status -v test + zpool scrub test + wait_completion + zpool status -v test + + zpool export test + zpool import -o cachefile=none -d $dir test + + for (( i=0; i<$nparity; i=i+1 )); do + if [[ ! $combrec ]]; then + zpool offline test $dir/$i + fi + dd conv=notrunc if=/dev/zero of=$dir/$i bs=1024k seek=4 count=500 + done + sum /test/fs/file + zpool status -v test + + if [[ $combrec ]]; then + zpool scrub test + else + for (( i=0; i<$nparity; i=i+1 )); do + zpool replace -f test $dir/$i + done + fi + wait_completion + zpool status -v test + zpool clear test + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + if [[ ! $combrec ]]; then + zpool offline test $dir/$i + fi + dd conv=notrunc if=/dev/zero of=$dir/$i bs=1024k seek=4 count=500 + done + # XXX sometimes, scrub was already started + # XXX some READ (not CKSUM) errors reported + zpool status -v test + if [[ $combrec ]]; then + # XXX if scrub already started above, this scrub doesn't seem to repair everything, some + # repairs happen in final scrub + zpool scrub test + else + for (( i=0; i<$nparity; i=i+1 )); do + zpool replace -f test $dir/$i + done + fi + wait_completion + zpool status -v test + zpool clear test + + sum /test/fs3/file + + zpool scrub test + wait_completion + zpool status -v test + + zpool destroy test +} + +dotest 2 +dotest 3 +dotest 1 From c8fb0c7cab56e3ddc66373d74d074aed7b79a5dc Mon Sep 17 00:00:00 2001 From: Thorsten Behrens Date: Tue, 31 Mar 2020 09:36:20 -0400 Subject: [PATCH 325/325] Changes to adjust to renamed functions in zfs 0.8.3 --- cmd/ztest/ztest.c | 5 ++++- module/zfs/spa.c | 24 ++++++++++++------------ module/zfs/vdev_raidz.c | 14 +++++++------- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 082ac4b5fe7..6bb023a5da5 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -7111,7 +7111,10 @@ ztest_run(ztest_shared_t *zs) metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; - VERIFY0(vdev_raidz_impl_set("cycle")); + /* + * BUGBUG - raidz expansion commented out below because raidz math library is excluded from makefile + * VERIFY0(vdev_raidz_impl_set("cycle")); + */ dmu_objset_stats_t dds; VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index a5a9c763d1a..c54989c9b05 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -6270,18 +6270,18 @@ if (raidz) { */ vdev_dirty(tvd, VDD_DTL, newvd, txg); - /* - * Schedule the resilver to restart in the future. We do this to - * ensure that dmu_sync-ed blocks have been stitched into the - * respective datasets. We do not do this if resilvers have been - * deferred. - */ - if (dsl_scan_resilvering(spa_get_dsl(spa)) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, newvd); - else - dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); - } + /* + * Schedule the resilver to restart in the future. We do this to + * ensure that dmu_sync-ed blocks have been stitched into the + * respective datasets. We do not do this if resilvers have been + * deferred. + */ + if (dsl_scan_resilvering(spa_get_dsl(spa)) && + spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_defer_resilver(newvd); + else + dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg); +} if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index ee768bd8b70..75ce22bf49a 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2112,7 +2112,7 @@ vdev_raidz_io_start(zio_t *zio) if (vdrz->vd_logical_width != vdrz->vd_physical_width) { /* XXX rangelock not needed after expansion completes */ locked_range_t *lr = - rangelock_enter(&vdrz->vn_vre.vre_rangelock, + zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock, zio->io_offset, zio->io_size, RL_READER); rm = vdev_raidz_map_alloc_expanded(zio->io_abd, @@ -2911,7 +2911,7 @@ vdev_raidz_io_done(zio_t *zio) } } if (rm->rm_lr != NULL) { - rangelock_exit(rm->rm_lr); + zfs_rangelock_exit(rm->rm_lr); rm->rm_lr = NULL; } } @@ -3011,7 +3011,7 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) * Ensure there are no i/os to the range that is being committed. * XXX This might be overkill? */ - locked_range_t *lr = rangelock_enter(&vre->vre_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&vre->vre_rangelock, vre->vre_offset_phys, vre->vre_offset_pertxg[txgoff] - vre->vre_offset_phys, RL_WRITER); @@ -3021,7 +3021,7 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) */ vre->vre_offset_phys = vre->vre_offset_pertxg[txgoff]; vre->vre_offset_pertxg[txgoff] = 0; - rangelock_exit(lr); + zfs_rangelock_exit(lr); /* * vre_offset_phys will be added to the on-disk config by @@ -3096,7 +3096,7 @@ raidz_reflow_write_done(zio_t *zio) cv_signal(&vre->vre_cv); mutex_exit(&vre->vre_lock); - rangelock_exit(rra->rra_lr); + zfs_rangelock_exit(rra->rra_lr); kmem_free(rra, sizeof (*rra)); spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); @@ -3169,7 +3169,7 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra), KM_SLEEP); rra->rra_vre = vre; - rra->rra_lr = rangelock_enter(&vre->vre_rangelock, + rra->rra_lr = zfs_rangelock_enter(&vre->vre_rangelock, offset, length, RL_WRITER); mutex_enter(&vre->vre_lock); @@ -3482,7 +3482,7 @@ vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv) vdrz->vn_vre.vre_offset_phys = UINT64_MAX; mutex_init(&vdrz->vn_vre.vre_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vdrz->vn_vre.vre_cv, NULL, CV_DEFAULT, NULL); - rangelock_init(&vdrz->vn_vre.vre_rangelock, NULL, NULL); + zfs_rangelock_init(&vdrz->vn_vre.vre_rangelock, NULL, NULL); uint_t children; nvlist_t **child;