diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 004711ae78c4..2b47d458c1a3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -51,7 +51,7 @@ configure option should be set. This will enable additional correctness checks and all the ASSERTs to help quickly catch potential issues. In addition, there are numerous utilities and debugging files which -provide visibility in to the inner workings of ZFS. The most useful +provide visibility into the inner workings of ZFS. The most useful of these tools are discussed in detail on the [debugging ZFS wiki page](https://github.com/zfsonlinux/zfs/wiki/Debugging). diff --git a/.gitignore b/.gitignore index 549fa59f3822..57867bfc6eab 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ Makefile.in # Top level generated files specific to this top level dir # /bin +/build /configure /config.log /config.status @@ -62,4 +63,3 @@ cscope.* *.orig *.log venv - diff --git a/META b/META index a93750eebd95..4871ede9cf77 100644 --- a/META +++ b/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.0 +Version: 0.8.3 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS on Linux -Linux-Maximum: 5.1 +Linux-Maximum: 5.4 Linux-Minimum: 2.6.32 diff --git a/Makefile.am b/Makefile.am index b4416c7492fd..70d9fd7fb6a6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,11 +25,16 @@ EXTRA_DIST += META AUTHORS COPYRIGHT LICENSE NEWS NOTICE README.md EXTRA_DIST += CODE_OF_CONDUCT.md # Include all the extra licensing information for modules -EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE module/icp/algs/skein/THIRDPARTYLICENSE.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip -EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2 module/spl/THIRDPARTYLICENSE.gplv2.descrip -EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash module/zfs/THIRDPARTYLICENSE.cityhash.descrip +EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE +EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE.descrip +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl +EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip +EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2 +EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2.descrip +EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash +EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash.descrip @CODE_COVERAGE_RULES@ @@ -39,8 +44,9 @@ gitrev: BUILT_SOURCES = gitrev +# Double-colon rules are allowed; there are multiple independent definitions. distclean-local:: - -$(RM) -R autom4te*.cache + -$(RM) -R autom4te*.cache build -find . \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS \ -o -name .pc -o -name .hg -o -name .git \) -prune -o \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ @@ -52,7 +58,8 @@ distclean-local:: -type f -print | xargs $(RM) all-local: - -${top_srcdir}/scripts/zfs-tests.sh -c + -[ -x ${top_builddir}/scripts/zfs-tests.sh ] && \ + ${top_builddir}/scripts/zfs-tests.sh -c dist-hook: gitrev cp ${top_srcdir}/include/zfs_gitrev.h $(distdir)/include; \ @@ -86,8 +93,8 @@ commitcheck: fi cstyle: - @find ${top_srcdir} -name '*.[hc]' ! -name 'zfs_config.*' \ - ! -name '*.mod.c' -type f \ + @find ${top_srcdir} -name build -prune -o -name '*.[hc]' \ + ! -name 'zfs_config.*' ! -name '*.mod.c' -type f \ -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+ shellcheck: @@ -111,9 +118,10 @@ mancheck: fi testscheck: - @find ${top_srcdir}/tests/zfs-tests/tests -type f \ + @find ${top_srcdir}/tests/zfs-tests -type f \ \( -name '*.ksh' -not -executable \) -o \ \( -name '*.kshlib' -executable \) -o \ + \( -name '*.shlib' -executable \) -o \ \( -name '*.cfg' -executable \) | \ xargs -r stat -c '%A %n' | \ awk '{c++; print} END {if(c>0) exit 1}' diff --git a/cmd/Makefile.am b/cmd/Makefile.am index a9d1ef438687..f1ade0390b57 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,3 +1,8 @@ SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest -SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat dbufstat zed -SUBDIRS += arc_summary zgenhostid +SUBDIRS += fsck_zfs vdev_id zgenhostid + +if USING_PYTHON +SUBDIRS += arcstat arc_summary dbufstat +endif + +SUBDIRS += mount_zfs zed zvol_id zvol_wait diff --git a/cmd/arc_summary/Makefile.am b/cmd/arc_summary/Makefile.am index a83edffadcb9..7d83624d66d3 100644 --- a/cmd/arc_summary/Makefile.am +++ b/cmd/arc_summary/Makefile.am @@ -4,9 +4,7 @@ if USING_PYTHON_2 dist_bin_SCRIPTS = arc_summary2 install-exec-hook: mv $(DESTDIR)$(bindir)/arc_summary2 $(DESTDIR)$(bindir)/arc_summary -endif - -if USING_PYTHON_3 +else dist_bin_SCRIPTS = arc_summary3 install-exec-hook: mv $(DESTDIR)$(bindir)/arc_summary3 $(DESTDIR)$(bindir)/arc_summary diff --git a/cmd/arc_summary/arc_summary2 b/cmd/arc_summary/arc_summary2 index ab4a3c574a5f..1326d9e627f8 100755 --- a/cmd/arc_summary/arc_summary2 +++ b/cmd/arc_summary/arc_summary2 @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/env python2 # # $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $ # diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index fc5e1e4b64c1..e9890bf21e69 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright (c) 2008 Ben Rockwood , # Copyright (c) 2010 Martin Matuska , @@ -43,7 +43,7 @@ import subprocess import sys import time -DECRIPTION = 'Print ARC and other statistics for ZFS on Linux' +DESCRIPTION = 'Print ARC and other statistics for ZFS on Linux' INDENT = ' '*8 LINE_LENGTH = 72 PROC_PATH = '/proc/spl/kstat/zfs/' @@ -65,7 +65,7 @@ SECTION_PATHS = {'arc': 'arcstats', 'zfetch': 'zfetchstats', 'zil': 'zil'} -parser = argparse.ArgumentParser(description=DECRIPTION) +parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('-a', '--alternate', action='store_true', default=False, help='use alternate formatting for tunables and SPL', dest='alt') @@ -284,7 +284,7 @@ def get_spl_tunables(PATH): def get_descriptions(request): - """Get the decriptions of the Solaris Porting Layer (SPL) or the + """Get the descriptions of the Solaris Porting Layer (SPL) or the tunables, return with minimal formatting. """ @@ -677,10 +677,10 @@ def section_l2arc(kstats_dict): prt_1('L2ARC breakdown:', f_hits(l2_access_total)) prt_i2('Hit ratio:', f_perc(arc_stats['l2_hits'], l2_access_total), - f_bytes(arc_stats['l2_hits'])) + f_hits(arc_stats['l2_hits'])) prt_i2('Miss ratio:', f_perc(arc_stats['l2_misses'], l2_access_total), - f_bytes(arc_stats['l2_misses'])) + f_hits(arc_stats['l2_misses'])) prt_i1('Feeds:', f_hits(arc_stats['l2_feeds'])) print() @@ -708,7 +708,7 @@ def section_l2arc(kstats_dict): def section_spl(*_): """Print the SPL parameters, if requested with alternative format - and/or decriptions. This does not use kstats. + and/or descriptions. This does not use kstats. """ spls = get_spl_tunables(SPL_PATH) @@ -725,7 +725,7 @@ def section_spl(*_): try: print(INDENT+'#', descriptions[key]) except KeyError: - print(INDENT+'# (No decription found)') # paranoid + print(INDENT+'# (No description found)') # paranoid print(format_raw_line(key, value)) @@ -734,7 +734,7 @@ def section_spl(*_): def section_tunables(*_): """Print the tunables, if requested with alternative format and/or - decriptions. This does not use kstasts. + descriptions. This does not use kstasts. """ tunables = get_spl_tunables(TUNABLES_PATH) @@ -751,7 +751,7 @@ def section_tunables(*_): try: print(INDENT+'#', descriptions[key]) except KeyError: - print(INDENT+'# (No decription found)') # paranoid + print(INDENT+'# (No description found)') # paranoid print(format_raw_line(key, value)) diff --git a/cmd/arcstat/Makefile.am b/cmd/arcstat/Makefile.am index 462e9a6197a8..8166778a13e3 100644 --- a/cmd/arcstat/Makefile.am +++ b/cmd/arcstat/Makefile.am @@ -1,13 +1,13 @@ dist_bin_SCRIPTS = arcstat # -# The arcstat script is compatibile with both Python 2.6 and 3.4. +# The arcstat script is compatible with both Python 2.6 and 3.4. # As such the python 3 shebang can be replaced at install time when # targeting a python 2 system. This allows us to maintain a single # version of the source. # if USING_PYTHON_2 install-exec-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(bindir)/arcstat endif diff --git a/cmd/arcstat/arcstat b/cmd/arcstat/arcstat index 57a2d621f34a..003499928f32 100755 --- a/cmd/arcstat/arcstat +++ b/cmd/arcstat/arcstat @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Print out ZFS ARC Statistics exported via kstat(1) # For a definition of fields, or usage, use arctstat.pl -v diff --git a/cmd/dbufstat/Makefile.am b/cmd/dbufstat/Makefile.am index 968a7607797f..a3f0c6e50d73 100644 --- a/cmd/dbufstat/Makefile.am +++ b/cmd/dbufstat/Makefile.am @@ -1,13 +1,13 @@ dist_bin_SCRIPTS = dbufstat # -# The dbufstat script is compatibile with both Python 2.6 and 3.4. +# The dbufstat script is compatible with both Python 2.6 and 3.4. # As such the python 3 shebang can be replaced at install time when # targeting a python 2 system. This allows us to maintain a single # version of the source. # if USING_PYTHON_2 install-exec-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(bindir)/dbufstat endif diff --git a/cmd/dbufstat/dbufstat b/cmd/dbufstat/dbufstat index e6c947fbcbdb..4a57d811301f 100755 --- a/cmd/dbufstat/dbufstat +++ b/cmd/dbufstat/dbufstat @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Print out statistics for all cached dmu buffers. This information # is available through the dbufs kstat and may be post-processed as diff --git a/cmd/fsck_zfs/fsck.zfs b/cmd/fsck_zfs/fsck.zfs index f1685db6527b..129a7f39c388 100755 --- a/cmd/fsck_zfs/fsck.zfs +++ b/cmd/fsck_zfs/fsck.zfs @@ -1,6 +1,6 @@ #!/bin/sh # -# fsck.zfs: A fsck helper to accomidate distributions that expect +# fsck.zfs: A fsck helper to accommodate distributions that expect # to be able to execute a fsck on all filesystem types. Currently # this script does nothing but it could be extended to act as a # compatibility wrapper for 'zpool scrub'. diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c index a9b1e166b4b0..a37dd6f53a16 100644 --- a/cmd/mount_zfs/mount_zfs.c +++ b/cmd/mount_zfs/mount_zfs.c @@ -489,7 +489,7 @@ main(int argc, char **argv) zfsutil = 1; if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (MOUNT_SYSERR); } diff --git a/cmd/vdev_id/vdev_id b/cmd/vdev_id/vdev_id index 3796ab4885d8..a79aed3b5d82 100755 --- a/cmd/vdev_id/vdev_id +++ b/cmd/vdev_id/vdev_id @@ -102,7 +102,7 @@ Usage: vdev_id [-h] vdev_id <-d device> [-c config_file] [-p phys_per_port] [-g sas_direct|sas_switch|scsi] [-m] - -c specify name of alernate config file [default=$CONFIG] + -c specify name of an alternative config file [default=$CONFIG] -d specify basename of device (i.e. sda) -e Create enclose device symlinks only (/dev/by-enclosure) -g Storage network topology [default="$TOPOLOGY"] diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 4b07cdb8e0cb..0182c79720b5 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -26,6 +26,7 @@ * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. * Copyright (c) 2015, 2017, Intel Corporation. + * Copyright (c) 2019 Datto Inc. */ #include @@ -109,7 +110,7 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); uint64_t *zopt_object = NULL; static unsigned zopt_objects = 0; -uint64_t max_inflight = 1000; +uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ static int leaked_objects = 0; static range_tree_t *mos_refd_objs; @@ -1449,6 +1450,12 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], + (u_longlong_t)bp->blk_cksum.zc_word[2], + (u_longlong_t)bp->blk_cksum.zc_word[3]); } } @@ -2408,7 +2415,7 @@ static const char *objset_types[DMU_OST_NUMTYPES] = { static void dump_dir(objset_t *os) { - dmu_objset_stats_t dds; + dmu_objset_stats_t dds = { 0 }; uint64_t object, object_count; uint64_t refdbytes, usedobjs, scratch; char numbuf[32]; @@ -3449,7 +3456,7 @@ zdb_blkptr_done(zio_t *zio) abd_free(zio->io_abd); mutex_enter(&spa->spa_scrub_lock); - spa->spa_load_verify_ios--; + spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); cv_broadcast(&spa->spa_scrub_io_cv); if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { @@ -3520,9 +3527,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, flags |= ZIO_FLAG_SPECULATIVE; mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_ios > max_inflight) + while (spa->spa_load_verify_bytes > max_inflight_bytes) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_load_verify_ios++; + spa->spa_load_verify_bytes += size; mutex_exit(&spa->spa_scrub_lock); zio_nowait(zio_read(NULL, spa, bp, abd, size, @@ -4285,6 +4292,7 @@ dump_block_stats(spa_t *spa) ZIO_FLAG_GODFATHER); } } + ASSERT0(spa->spa_load_verify_bytes); /* * Done after zio_wait() since zcb_haderrors is modified in @@ -4778,7 +4786,7 @@ zdb_set_skip_mmp(char *target) * the name of the target pool. * * Note that the checkpointed state's pool name will be the name of - * the original pool with the above suffix appened to it. In addition, + * the original pool with the above suffix appended to it. In addition, * if the target is not a pool name (e.g. a path to a dataset) then * the new_path parameter is populated with the updated path to * reflect the fact that we are looking into the checkpointed state. @@ -5445,9 +5453,9 @@ dump_zpool(spa_t *spa) #define ZDB_FLAG_BSWAP 0x0004 #define ZDB_FLAG_GBH 0x0008 #define ZDB_FLAG_INDIRECT 0x0010 -#define ZDB_FLAG_PHYS 0x0020 -#define ZDB_FLAG_RAW 0x0040 -#define ZDB_FLAG_PRINT_BLKPTR 0x0080 +#define ZDB_FLAG_RAW 0x0020 +#define ZDB_FLAG_PRINT_BLKPTR 0x0040 +#define ZDB_FLAG_VERBOSE 0x0080 static int flagbits[256]; @@ -5578,11 +5586,30 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) return (NULL); } +static boolean_t +zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) +{ + char *s0, *s1; + + if (sizes == NULL) + return (B_FALSE); + + s0 = strtok(sizes, "/"); + if (s0 == NULL) + return (B_FALSE); + s1 = strtok(NULL, "/"); + *lsize = strtoull(s0, NULL, 16); + *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; + return (*lsize >= *psize && *psize > 0); +} + +#define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) + /* * Read a block from a pool and print it out. The syntax of the * block descriptor is: * - * pool:vdev_specifier:offset:size[:flags] + * pool:vdev_specifier:offset:[lsize/]psize[:flags] * * pool - The name of the pool you wish to read from * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) @@ -5590,15 +5617,14 @@ zdb_vdev_lookup(vdev_t *vdev, const char *path) * size - Amount of data to read, in hex, in bytes * flags - A string of characters specifying options * b: Decode a blkptr at given offset within block - * *c: Calculate and display checksums + * c: Calculate and display checksums * d: Decompress data before dumping * e: Byteswap data before dumping * g: Display data as a gang block header * i: Display as an indirect block - * p: Do I/O to physical offset * r: Dump raw data to stdout + * v: Verbose * - * * = not yet implemented */ static void zdb_read_block(char *thing, spa_t *spa) @@ -5606,13 +5632,12 @@ zdb_read_block(char *thing, spa_t *spa) blkptr_t blk, *bp = &blk; dva_t *dva = bp->blk_dva; int flags = 0; - uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; + uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; zio_t *zio; vdev_t *vd; abd_t *pabd; void *lbuf, *buf; - const char *s, *vdev; - char *p, *dup, *flagstr; + char *s, *p, *dup, *vdev, *flagstr, *sizes; int i, error; boolean_t borrowed = B_FALSE; @@ -5621,18 +5646,14 @@ zdb_read_block(char *thing, spa_t *spa) vdev = s ? s : ""; s = strtok(NULL, ":"); offset = strtoull(s ? s : "", NULL, 16); + sizes = strtok(NULL, ":"); s = strtok(NULL, ":"); - size = strtoull(s ? s : "", NULL, 16); - s = strtok(NULL, ":"); - if (s) - flagstr = strdup(s); - else - flagstr = strdup(""); + flagstr = strdup(s ? s : ""); s = NULL; - if (size == 0) - s = "size must not be zero"; - if (!IS_P2ALIGNED(size, DEV_BSIZE)) + if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) + s = "invalid size(s)"; + if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) s = "size must be a multiple of sector size"; if (!IS_P2ALIGNED(offset, DEV_BSIZE)) s = "offset must be a multiple of sector size"; @@ -5688,9 +5709,6 @@ zdb_read_block(char *thing, spa_t *spa) vd->vdev_ops->vdev_op_type); } - psize = size; - lsize = size; - pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); @@ -5747,30 +5765,41 @@ zdb_read_block(char *thing, spa_t *spa) * We don't know how the data was compressed, so just try * every decompress function at every inflated blocksize. */ - enum zio_compress c; void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); + int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; + int *cfuncp = cfuncs; + uint64_t maxlsize = SPA_MAXBLOCKSIZE; + uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | + ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | + (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0); + *cfuncp++ = ZIO_COMPRESS_LZ4; + *cfuncp++ = ZIO_COMPRESS_LZJB; + mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); + for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) + if (((1ULL << c) & mask) == 0) + *cfuncp++ = c; /* - * XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB, - * this could take a while and we should let the user know + * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this + * could take a while and we should let the user know * we are not stuck. On the other hand, printing progress - * info gets old after a while. What to do? + * info gets old after a while. User can specify 'v' flag + * to see the progression. */ - for (lsize = psize + SPA_MINBLOCKSIZE; - lsize <= SPA_MAXBLOCKSIZE; lsize += SPA_MINBLOCKSIZE) { - for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { - /* - * ZLE can easily decompress non zle stream. - * So have an option to disable it. - */ - if (c == ZIO_COMPRESS_ZLE && - getenv("ZDB_NO_ZLE")) - continue; - - (void) fprintf(stderr, - "Trying %05llx -> %05llx (%s)\n", - (u_longlong_t)psize, (u_longlong_t)lsize, - zio_compress_table[c].ci_name); + if (lsize == psize) + lsize += SPA_MINBLOCKSIZE; + else + maxlsize = lsize; + for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { + for (cfuncp = cfuncs; *cfuncp; cfuncp++) { + if (flags & ZDB_FLAG_VERBOSE) { + (void) fprintf(stderr, + "Trying %05llx -> %05llx (%s)\n", + (u_longlong_t)psize, + (u_longlong_t)lsize, + zio_compress_table[*cfuncp].\ + ci_name); + } /* * We randomize lbuf2, and decompress to both @@ -5779,27 +5808,30 @@ zdb_read_block(char *thing, spa_t *spa) */ VERIFY0(random_get_pseudo_bytes(lbuf2, lsize)); - if (zio_decompress_data(c, pabd, + if (zio_decompress_data(*cfuncp, pabd, lbuf, psize, lsize) == 0 && - zio_decompress_data(c, pabd, + zio_decompress_data(*cfuncp, pabd, lbuf2, psize, lsize) == 0 && bcmp(lbuf, lbuf2, lsize) == 0) break; } - if (c != ZIO_COMPRESS_FUNCTIONS) + if (*cfuncp != 0) break; } umem_free(lbuf2, SPA_MAXBLOCKSIZE); - if (lsize > SPA_MAXBLOCKSIZE) { + if (lsize > maxlsize) { (void) printf("Decompress of %s failed\n", thing); goto out; } buf = lbuf; - size = lsize; + if (*cfuncp == ZIO_COMPRESS_ZLE) { + printf("\nZLE decompression was selected. If you " + "suspect the results are wrong,\ntry avoiding ZLE " + "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); + } } else { - size = psize; - buf = abd_borrow_buf_copy(pabd, size); + buf = abd_borrow_buf_copy(pabd, lsize); borrowed = B_TRUE; } @@ -5807,17 +5839,78 @@ zdb_read_block(char *thing, spa_t *spa) zdb_print_blkptr((blkptr_t *)(void *) ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); else if (flags & ZDB_FLAG_RAW) - zdb_dump_block_raw(buf, size, flags); + zdb_dump_block_raw(buf, lsize, flags); else if (flags & ZDB_FLAG_INDIRECT) - zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), + zdb_dump_indirect((blkptr_t *)buf, lsize / sizeof (blkptr_t), flags); else if (flags & ZDB_FLAG_GBH) zdb_dump_gbh(buf, flags); else - zdb_dump_block(thing, buf, size, flags); + zdb_dump_block(thing, buf, lsize, flags); + + /* + * If :c was specified, iterate through the checksum table to + * calculate and display each checksum for our specified + * DVA and length. + */ + if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) && + !(flags & ZDB_FLAG_GBH)) { + zio_t *czio, *cio; + (void) printf("\n"); + for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL; + ck < ZIO_CHECKSUM_FUNCTIONS; ck++) { + + if ((zio_checksum_table[ck].ci_flags & + ZCHECKSUM_FLAG_EMBEDDED) || + ck == ZIO_CHECKSUM_NOPARITY) { + continue; + } + BP_SET_CHECKSUM(bp, ck); + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + czio->io_bp = bp; + + if (vd == vd->vdev_top) { + cio = zio_read(czio, spa, bp, pabd, psize, + NULL, NULL, + ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | + ZIO_FLAG_DONT_RETRY, NULL); + zio_nowait(cio); + } else { + zio_nowait(zio_vdev_child_io(czio, bp, vd, + offset, pabd, psize, ZIO_TYPE_READ, + ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_DONT_CACHE | + ZIO_FLAG_DONT_PROPAGATE | + ZIO_FLAG_DONT_RETRY | + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | + ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_OPTIONAL, NULL, NULL)); + } + error = zio_wait(czio); + if (error == 0 || error == ECKSUM) { + zio_t *ck_zio = zio_root(spa, NULL, NULL, 0); + ck_zio->io_offset = + DVA_GET_OFFSET(&bp->blk_dva[0]); + ck_zio->io_bp = bp; + zio_checksum_compute(ck_zio, ck, pabd, lsize); + printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", + zio_checksum_table[ck].ci_name, + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], + (u_longlong_t)bp->blk_cksum.zc_word[2], + (u_longlong_t)bp->blk_cksum.zc_word[3]); + zio_wait(ck_zio); + } else { + printf("error %d reading block\n", error); + } + spa_config_exit(spa, SCL_STATE, FTAG); + } + } if (borrowed) - abd_return_buf_copy(pabd, buf, size); + abd_return_buf_copy(pabd, buf, lsize); out: abd_free(pabd); @@ -5933,10 +6026,10 @@ main(int argc, char **argv) break; /* NB: Sort single match options below. */ case 'I': - max_inflight = strtoull(optarg, NULL, 0); - if (max_inflight == 0) { + max_inflight_bytes = strtoull(optarg, NULL, 0); + if (max_inflight_bytes == 0) { (void) fprintf(stderr, "maximum number " - "of inflight I/Os must be greater " + "of inflight bytes must be greater " "than 0\n"); usage(); } @@ -6232,8 +6325,8 @@ main(int argc, char **argv) flagbits['e'] = ZDB_FLAG_BSWAP; flagbits['g'] = ZDB_FLAG_GBH; flagbits['i'] = ZDB_FLAG_INDIRECT; - flagbits['p'] = ZDB_FLAG_PHYS; flagbits['r'] = ZDB_FLAG_RAW; + flagbits['v'] = ZDB_FLAG_VERBOSE; for (int i = 0; i < argc; i++) zdb_read_block(argv[i], spa); diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 9c11315f2a58..fb479f9b5c79 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -1,12 +1,11 @@ +SUBDIRS = zed.d + include $(top_srcdir)/config/Rules.am DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include -EXTRA_DIST = zed.d/README \ - zed.d/history_event-zfs-list-cacher.sh.in - sbin_PROGRAMS = zed ZED_SRC = \ @@ -47,55 +46,3 @@ zed_LDADD = \ zed_LDADD += -lrt zed_LDFLAGS = -pthread - -zedconfdir = $(sysconfdir)/zfs/zed.d - -dist_zedconf_DATA = \ - zed.d/zed-functions.sh \ - zed.d/zed.rc - -zedexecdir = $(zfsexecdir)/zed.d - -dist_zedexec_SCRIPTS = \ - zed.d/all-debug.sh \ - zed.d/all-syslog.sh \ - zed.d/data-notify.sh \ - zed.d/generic-notify.sh \ - zed.d/resilver_finish-notify.sh \ - zed.d/scrub_finish-notify.sh \ - zed.d/statechange-led.sh \ - zed.d/statechange-notify.sh \ - zed.d/vdev_clear-led.sh \ - zed.d/vdev_attach-led.sh \ - zed.d/pool_import-led.sh \ - zed.d/resilver_finish-start-scrub.sh - -nodist_zedexec_SCRIPTS = zed.d/history_event-zfs-list-cacher.sh - -$(nodist_zedexec_SCRIPTS): %: %.in - -$(SED) -e 's,@bindir\@,$(bindir),g' \ - -e 's,@runstatedir\@,$(runstatedir),g' \ - -e 's,@sbindir\@,$(sbindir),g' \ - -e 's,@sysconfdir\@,$(sysconfdir),g' \ - $< >'$@' - -zedconfdefaults = \ - all-syslog.sh \ - data-notify.sh \ - resilver_finish-notify.sh \ - scrub_finish-notify.sh \ - statechange-led.sh \ - statechange-notify.sh \ - vdev_clear-led.sh \ - vdev_attach-led.sh \ - pool_import-led.sh \ - resilver_finish-start-scrub.sh - -install-data-hook: - $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" - for f in $(zedconfdefaults); do \ - test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ - -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ - ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ - done - chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/agents/fmd_api.c b/cmd/zed/agents/fmd_api.c index ae90a322cf90..607b387ca3a8 100644 --- a/cmd/zed/agents/fmd_api.c +++ b/cmd/zed/agents/fmd_api.c @@ -25,7 +25,7 @@ */ /* - * This file imlements the minimal FMD module API required to support the + * This file implements the minimal FMD module API required to support the * fault logic modules in ZED. This support includes module registration, * memory allocation, module property accessors, basic case management, * one-shot timers and SERD engines. diff --git a/cmd/zed/agents/fmd_serd.c b/cmd/zed/agents/fmd_serd.c index 043552862e82..d4ec37fb7691 100644 --- a/cmd/zed/agents/fmd_serd.c +++ b/cmd/zed/agents/fmd_serd.c @@ -281,7 +281,7 @@ fmd_serd_eng_empty(fmd_serd_eng_t *sgp) void fmd_serd_eng_reset(fmd_serd_eng_t *sgp) { - serd_log_msg(" SERD Engine: reseting %s", sgp->sg_name); + serd_log_msg(" SERD Engine: resetting %s", sgp->sg_name); while (sgp->sg_count != 0) fmd_serd_eng_discard(sgp, list_head(&sgp->sg_list)); diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 6d392604bceb..006e0ab99f47 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -116,7 +116,8 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) /* * On a devid match, grab the vdev guid and expansion time, if any. */ - if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && + if (gsp->gs_devid != NULL && + (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 6d3e7cb11250..d980794d0a57 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -157,7 +157,7 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data) * 1. physical match with no fs, no partition * tag it top, partition disk * - * 2. physical match again, see partion and tag + * 2. physical match again, see partition and tag * */ @@ -674,7 +674,7 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi) devid, devpath ? devpath : "NULL", is_slice); /* - * Iterate over all vdevs looking for a match in the folllowing order: + * Iterate over all vdevs looking for a match in the following order: * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk) * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location). * @@ -892,7 +892,7 @@ zfs_enum_pools(void *arg) * * sent messages from zevents or udev monitor * - * For now, each agent has it's own libzfs instance + * For now, each agent has its own libzfs instance */ int zfs_slm_init() diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am new file mode 100644 index 000000000000..716db2b2f215 --- /dev/null +++ b/cmd/zed/zed.d/Makefile.am @@ -0,0 +1,57 @@ +include $(top_srcdir)/config/Rules.am + +EXTRA_DIST = \ + README \ + history_event-zfs-list-cacher.sh.in + +zedconfdir = $(sysconfdir)/zfs/zed.d + +dist_zedconf_DATA = \ + zed-functions.sh \ + zed.rc + +zedexecdir = $(zfsexecdir)/zed.d + +dist_zedexec_SCRIPTS = \ + all-debug.sh \ + all-syslog.sh \ + data-notify.sh \ + generic-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +nodist_zedexec_SCRIPTS = history_event-zfs-list-cacher.sh + +$(nodist_zedexec_SCRIPTS): %: %.in + -$(SED) -e 's,@bindir\@,$(bindir),g' \ + -e 's,@runstatedir\@,$(runstatedir),g' \ + -e 's,@sbindir\@,$(sbindir),g' \ + -e 's,@sysconfdir\@,$(sysconfdir),g' \ + $< >'$@' + +zedconfdefaults = \ + all-syslog.sh \ + data-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +install-data-hook: + $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" + for f in $(zedconfdefaults); do \ + test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ + -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ + ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ + done + chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in index c1513cf3a01f..6d0f44ab3260 100755 --- a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in +++ b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in @@ -47,7 +47,7 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in # Only act if one of the tracked properties is altered. case "${ZEVENT_HISTORY_INTERNAL_STR%%=*}" in canmount|mountpoint|atime|relatime|devices|exec| \ - readonly|setuid|nbmand) ;; + readonly|setuid|nbmand|encroot|keylocation) ;; *) exit 0 ;; esac ;; @@ -62,7 +62,7 @@ zed_lock zfs-list trap abort_alter EXIT PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly" -PROPS="${PROPS},setuid,nbmand" +PROPS="${PROPS},setuid,nbmand,encroot,keylocation" "${ZFS}" list -H -t filesystem -o $PROPS -r "${ZEVENT_POOL}" > "${FSLIST_TMP}" diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh index 6484b79592aa..e656e125d378 100755 --- a/cmd/zed/zed.d/statechange-led.sh +++ b/cmd/zed/zed.d/statechange-led.sh @@ -20,7 +20,7 @@ # # Exit codes: # 0: enclosure led successfully set -# 1: enclosure leds not not available +# 1: enclosure leds not available # 2: enclosure leds administratively disabled # 3: The led sysfs path passed from ZFS does not exist # 4: $ZPOOL not set @@ -68,7 +68,7 @@ check_and_set_led() # timeout. for _ in $(seq 1 5); do # We want to check the current state first, since writing to the - # 'fault' entry always always causes a SES command, even if the + # 'fault' entry always causes a SES command, even if the # current state is already what you want. current=$(cat "${file}") diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 2d97988a028b..fa1c6aa30283 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -28,6 +28,8 @@ * Copyright 2016 Igor Kozhukhov . * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2019, loli10K + * Copyright 2019 Joyent, Inc. */ #include @@ -991,10 +993,11 @@ zfs_do_create(int argc, char **argv) zpool_close(zpool_handle); goto error; } - zpool_close(zpool_handle); - volsize = zvol_volsize_to_reservation(volsize, real_props); + volsize = zvol_volsize_to_reservation(zpool_handle, volsize, + real_props); nvlist_free(real_props); + zpool_close(zpool_handle); if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop), &strval) != 0) { @@ -1879,7 +1882,7 @@ zfs_do_get(int argc, char **argv) flags &= ~ZFS_ITER_PROP_LISTSNAPS; while (*optarg != '\0') { static char *type_subopts[] = { "filesystem", - "volume", "snapshot", "bookmark", + "volume", "snapshot", "snap", "bookmark", "all", NULL }; switch (getsubopt(&optarg, type_subopts, @@ -1891,12 +1894,13 @@ zfs_do_get(int argc, char **argv) types |= ZFS_TYPE_VOLUME; break; case 2: + case 3: types |= ZFS_TYPE_SNAPSHOT; break; - case 3: + case 4: types |= ZFS_TYPE_BOOKMARK; break; - case 4: + case 5: types = ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK; break; @@ -1929,11 +1933,11 @@ zfs_do_get(int argc, char **argv) fields = argv[0]; /* - * Handle users who want to get all snapshots of the current - * dataset (ex. 'zfs get -t snapshot refer '). + * Handle users who want to get all snapshots or bookmarks + * of a dataset (ex. 'zfs get -t snapshot refer '). */ - if (types == ZFS_TYPE_SNAPSHOT && argc > 1 && - (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { + if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) && + argc > 1 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE); limit = 1; } @@ -2238,7 +2242,7 @@ zfs_do_upgrade(int argc, char **argv) boolean_t showversions = B_FALSE; int ret = 0; upgrade_cbdata_t cb = { 0 }; - signed char c; + int c; int flags = ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ @@ -3433,11 +3437,11 @@ zfs_do_list(int argc, char **argv) types &= ~ZFS_TYPE_SNAPSHOT; /* - * Handle users who want to list all snapshots of the current - * dataset (ex. 'zfs list -t snapshot '). + * Handle users who want to list all snapshots or bookmarks + * of the current dataset (ex. 'zfs list -t snapshot '). */ - if (types == ZFS_TYPE_SNAPSHOT && argc > 0 && - (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { + if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) && + argc > 0 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) { flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE); limit = 1; } @@ -3932,7 +3936,7 @@ static int zfs_do_snapshot(int argc, char **argv) { int ret = 0; - signed char c; + int c; nvlist_t *props; snap_cbdata_t sd = { 0 }; boolean_t multiple_snaps = B_FALSE; @@ -4892,7 +4896,6 @@ parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) zfs_deleg_who_type_t perm_type = name[0]; char perm_locality = name[1]; const char *perm_name = name + 3; - boolean_t is_set = B_TRUE; who_perm_t *who_perm = NULL; assert('$' == name[2]); @@ -4922,57 +4925,56 @@ parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) assert(!"unhandled zfs_deleg_who_type_t"); } - if (is_set) { - who_perm_node_t *found_node = NULL; - who_perm_node_t *node = safe_malloc( - sizeof (who_perm_node_t)); - who_perm = &node->who_perm; - uu_avl_index_t idx = 0; - - uu_avl_node_init(node, &node->who_avl_node, avl_pool); - who_perm_init(who_perm, fsperm, perm_type, perm_name); - - if ((found_node = uu_avl_find(avl, node, NULL, &idx)) - == NULL) { - if (avl == fsperm->fsp_uge_avl) { - uid_t rid = 0; - struct passwd *p = NULL; - struct group *g = NULL; - const char *nice_name = NULL; - - switch (perm_type) { - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - rid = atoi(perm_name); - p = getpwuid(rid); - if (p) - nice_name = p->pw_name; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - rid = atoi(perm_name); - g = getgrgid(rid); - if (g) - nice_name = g->gr_name; - break; + who_perm_node_t *found_node = NULL; + who_perm_node_t *node = safe_malloc( + sizeof (who_perm_node_t)); + who_perm = &node->who_perm; + uu_avl_index_t idx = 0; - default: - break; - } + uu_avl_node_init(node, &node->who_avl_node, avl_pool); + who_perm_init(who_perm, fsperm, perm_type, perm_name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) { + if (avl == fsperm->fsp_uge_avl) { + uid_t rid = 0; + struct passwd *p = NULL; + struct group *g = NULL; + const char *nice_name = NULL; - if (nice_name != NULL) - (void) strlcpy( - node->who_perm.who_ug_name, - nice_name, 256); + switch (perm_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + rid = atoi(perm_name); + p = getpwuid(rid); + if (p) + nice_name = p->pw_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + rid = atoi(perm_name); + g = getgrgid(rid); + if (g) + nice_name = g->gr_name; + break; + + default: + break; } - uu_avl_insert(avl, node, idx); - } else { - node = found_node; - who_perm = &node->who_perm; + if (nice_name != NULL) + (void) strlcpy( + node->who_perm.who_ug_name, + nice_name, 256); } + + uu_avl_insert(avl, node, idx); + } else { + node = found_node; + who_perm = &node->who_perm; } - VERIFY3P(who_perm, !=, NULL); + + assert(who_perm != NULL); (void) parse_who_perm(who_perm, nvl2, perm_locality); } @@ -6445,8 +6447,25 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (1); } - if (zfs_mount(zhp, options, flags) != 0) + if (zfs_mount(zhp, options, flags) != 0) { + /* + * Check if a mount sneaked in after we checked + */ + if (!explicit && + libzfs_errno(g_zfs) == EZFS_MOUNTFAILED) { + usleep(10 * MILLISEC); + libzfs_mnttab_cache(g_zfs, B_FALSE); + + if (zfs_is_mounted(zhp, NULL)) { + (void) fprintf(stderr, gettext( + "Ignoring previous 'already " + "mounted' error for '%s'\n"), + zfs_get_name(zhp)); + return (0); + } + } return (1); + } break; } @@ -6621,10 +6640,13 @@ share_mount(int op, int argc, char **argv) /* * libshare isn't mt-safe, so only do the operation in parallel - * if we're mounting. + * if we're mounting. Additionally, the key-loading option must + * be serialized so that we can prompt the user for their keys + * in a consistent manner. */ zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, - share_mount_one_cb, &share_mount_state, op == OP_MOUNT); + share_mount_one_cb, &share_mount_state, + op == OP_MOUNT && !(flags & MS_CRYPT)); ret = share_mount_state.sm_status; for (int i = 0; i < cb.cb_used; i++) @@ -6729,8 +6751,8 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused) /* * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an - * absolute path, find the entry /proc/self/mounts, verify that its a - * ZFS filesystems, and unmount it appropriately. + * absolute path, find the entry /proc/self/mounts, verify that it's a + * ZFS filesystem, and unmount it appropriately. */ static int unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) @@ -7522,7 +7544,7 @@ zfs_do_channel_program(int argc, char **argv) } if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { - (void) fprintf(stderr, gettext("cannot open pool '%s'"), + (void) fprintf(stderr, gettext("cannot open pool '%s'\n"), poolname); if (fd != 0) (void) close(fd); @@ -7895,7 +7917,7 @@ zfs_do_change_key(int argc, char **argv) * 4) zfs project [-p id] [-r] [-s] * Set project ID and/or inherit flag on the file(s) or directories. * -p: Set the project ID as the given id. - * -r: Set on subdirectorie recursively. If not specify "-p" option, + * -r: Set on subdirectories recursively. If not specify "-p" option, * it will use top-level directory's project ID as the given id, * then set both project ID and inherit flag on all descendants * of the top-level directory. @@ -8149,7 +8171,7 @@ main(int argc, char **argv) return (zfs_do_version(argc, argv)); if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c index 700961b06a3c..d4795d07110f 100644 --- a/cmd/zinject/translate.c +++ b/cmd/zinject/translate.c @@ -176,7 +176,7 @@ object_from_path(const char *dataset, uint64_t object, zinject_record_t *record) } /* - * Intialize the range based on the type, level, and range given. + * Initialize the range based on the type, level, and range given. */ static int initialize_range(err_type_t type, int level, char *range, @@ -310,7 +310,7 @@ translate_record(err_type_t type, const char *object, const char *range, ziprintf("raw object: %llu\n", record->zi_object); /* - * For the given object, intialize the range in bytes + * For the given object, initialize the range in bytes */ if (initialize_range(type, level, (char *)range, record) != 0) goto err; diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index cff7f861a2e3..1795bfd4506c 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -763,7 +763,7 @@ main(int argc, char **argv) uint32_t dvas = 0; if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 487fb5f84142..4859d2cd7da8 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -389,7 +389,7 @@ get_usage(zpool_help_t idx) "[ ...]\n")); case HELP_STATUS: return (gettext("\tstatus [-c [script1,script2,...]] " - "[-igLpPsvxD] [-T d|u] [pool] ... \n" + "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" @@ -785,7 +785,7 @@ add_prop_list_default(const char *propname, char *propval, nvlist_t **props, * -P Display full path for vdev name. * * Adds the given vdevs to 'pool'. As with create, the bulk of this work is - * handled by get_vdev_spec(), which constructs the nvlist needed to pass to + * handled by make_root_vdev(), which constructs the nvlist needed to pass to * libzfs. */ int @@ -883,7 +883,7 @@ zpool_do_add(int argc, char **argv) } } - /* pass off to get_vdev_spec for processing */ + /* pass off to make_root_vdev for processing */ nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun, argc, argv); if (nvroot == NULL) { @@ -973,7 +973,7 @@ zpool_do_remove(int argc, char **argv) int i, ret = 0; zpool_handle_t *zhp = NULL; boolean_t stop = B_FALSE; - char c; + int c; boolean_t noop = B_FALSE; boolean_t parsable = B_FALSE; @@ -1232,9 +1232,9 @@ zpool_do_labelclear(int argc, char **argv) * -O Set fsproperty=value in the pool's root file system * * Creates the named pool according to the given vdev specification. The - * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once - * we get the nvlist back from get_vdev_spec(), we either print out the contents - * (if '-n' was specified), or pass it to libzfs to do the creation. + * bulk of the vdev processing is done in make_root_vdev() in zpool_vdev.c. + * Once we get the nvlist back from make_root_vdev(), we either print out the + * contents (if '-n' was specified), or pass it to libzfs to do the creation. */ int zpool_do_create(int argc, char **argv) @@ -1388,7 +1388,7 @@ zpool_do_create(int argc, char **argv) goto errout; } - /* pass off to get_vdev_spec for bulk processing */ + /* pass off to make_root_vdev for bulk processing */ nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, argc - 1, argv + 1); if (nvroot == NULL) @@ -6111,9 +6111,8 @@ zpool_do_detach(int argc, char **argv) int ret; /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { + while ((c = getopt(argc, argv, "")) != -1) { switch (c) { - case 'f': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6342,12 +6341,11 @@ zpool_do_online(int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, "et")) != -1) { + while ((c = getopt(argc, argv, "e")) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; - case 't': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -8518,24 +8516,12 @@ typedef struct hist_cbdata { boolean_t internal; } hist_cbdata_t; -/* - * Print out the command history for a specific pool. - */ -static int -get_history_one(zpool_handle_t *zhp, void *data) +static void +print_history_records(nvlist_t *nvhis, hist_cbdata_t *cb) { - nvlist_t *nvhis; nvlist_t **records; uint_t numrecords; - int ret, i; - hist_cbdata_t *cb = (hist_cbdata_t *)data; - - cb->first = B_FALSE; - - (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); - - if ((ret = zpool_get_history(zhp, &nvhis)) != 0) - return (ret); + int i; verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); @@ -8639,8 +8625,32 @@ get_history_one(zpool_handle_t *zhp, void *data) (void) printf("]"); (void) printf("\n"); } +} + +/* + * Print out the command history for a specific pool. + */ +static int +get_history_one(zpool_handle_t *zhp, void *data) +{ + nvlist_t *nvhis; + int ret; + hist_cbdata_t *cb = (hist_cbdata_t *)data; + uint64_t off = 0; + boolean_t eof = B_FALSE; + + cb->first = B_FALSE; + + (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); + + while (!eof) { + if ((ret = zpool_get_history(zhp, &nvhis, &off, &eof)) != 0) + return (ret); + + print_history_records(nvhis, cb); + nvlist_free(nvhis); + } (void) printf("\n"); - nvlist_free(nvhis); return (ret); } @@ -9381,7 +9391,7 @@ main(int argc, char **argv) return (zpool_do_version(argc, argv)); if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "%s", libzfs_error_init(errno)); + (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); return (1); } diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 7ea9d742006d..527fca08b887 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -433,11 +433,12 @@ check_disk(const char *path, blkid_cache cache, int force, char *value = blkid_get_tag_value(cache, "TYPE", path); (void) fprintf(stderr, gettext("%s is in use and contains " "a %s filesystem.\n"), path, value ? value : "unknown"); + free(value); return (-1); } /* - * Expected to fail for non-EFI labled disks. Just check the device + * Expected to fail for non-EFI labeled disks. Just check the device * as given and do not attempt to detect and scan partitions. */ err = efi_alloc_and_read(fd, &vtoc); @@ -828,7 +829,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) rep.zprl_children = 1; rep.zprl_parity = 0; } else { - uint64_t vdev_size; + int64_t vdev_size; /* * This is a mirror or RAID-Z vdev. Go through and make @@ -858,12 +859,12 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) */ type = NULL; dontreport = 0; - vdev_size = -1ULL; + vdev_size = -1LL; for (c = 0; c < children; c++) { nvlist_t *cnv = child[c]; char *path; struct stat64 statbuf; - uint64_t size = -1ULL; + int64_t size = -1LL; char *childtype; int fd, err; @@ -954,7 +955,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) * (~16MB) then report an error. */ if (!dontreport && - (vdev_size != -1ULL && + (vdev_size != -1LL && (labs(size - vdev_size) > ZPOOL_FUZZ))) { if (ret != NULL) @@ -1866,7 +1867,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, } /* - * Validate each device to make sure that its not shared with another + * Validate each device to make sure that it's not shared with another * subsystem. We do this even if 'force' is set, because there are some * uses (such as a dedicated dump device) that even '-f' cannot * override. diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index ed88729b5182..6b960c20f70f 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -53,7 +53,6 @@ */ #define DUMP_GROUPING 4 -uint64_t total_write_size = 0; uint64_t total_stream_len = 0; FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; @@ -198,7 +197,7 @@ print_block(char *buf, int length) } /* - * Print an array of bytes to stdout as hexidecimal characters. str must + * Print an array of bytes to stdout as hexadecimal characters. str must * have buf_len * 2 + 1 bytes of space. */ static void @@ -219,6 +218,9 @@ main(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_payload_size = 0; + uint64_t total_overhead_size = 0; + uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; char salt[ZIO_DATA_SALT_LEN * 2 + 1]; char iv[ZIO_DATA_IV_LEN * 2 + 1]; char mac[ZIO_DATA_MAC_LEN * 2 + 1]; @@ -237,7 +239,7 @@ main(int argc, char *argv[]) struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; - char c; + int c; boolean_t verbose = B_FALSE; boolean_t very_verbose = B_FALSE; boolean_t first = B_TRUE; @@ -336,7 +338,9 @@ main(int argc, char *argv[]) } drr_record_count[drr->drr_type]++; + total_overhead_size += sizeof (*drr); total_records++; + payload_size = 0; switch (drr->drr_type) { case DRR_BEGIN: @@ -390,6 +394,7 @@ main(int argc, char *argv[]) nvlist_print(stdout, nv); nvlist_free(nv); } + payload_size = sz; } break; @@ -554,7 +559,6 @@ main(int argc, char *argv[]) if (dump) { print_block(buf, payload_size); } - total_write_size += payload_size; break; case DRR_WRITE_BYREF: @@ -683,6 +687,7 @@ main(int argc, char *argv[]) print_block(buf, P2ROUNDUP(drrwe->drr_psize, 8)); } + payload_size = P2ROUNDUP(drrwe->drr_psize, 8); break; case DRR_OBJECT_RANGE: if (do_byteswap) { @@ -723,6 +728,8 @@ main(int argc, char *argv[]) (longlong_t)drrc->drr_checksum.zc_word[3]); } pcksum = zc; + drr_byte_count[drr->drr_type] += payload_size; + total_payload_size += payload_size; } free(buf); fletcher_4_fini(); @@ -730,28 +737,40 @@ main(int argc, char *argv[]) /* Print final summary */ (void) printf("SUMMARY:\n"); - (void) printf("\tTotal DRR_BEGIN records = %lld\n", - (u_longlong_t)drr_record_count[DRR_BEGIN]); - (void) printf("\tTotal DRR_END records = %lld\n", - (u_longlong_t)drr_record_count[DRR_END]); - (void) printf("\tTotal DRR_OBJECT records = %lld\n", - (u_longlong_t)drr_record_count[DRR_OBJECT]); - (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); - (void) printf("\tTotal DRR_WRITE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE]); - (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); - (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); - (void) printf("\tTotal DRR_FREE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREE]); - (void) printf("\tTotal DRR_SPILL records = %lld\n", - (u_longlong_t)drr_record_count[DRR_SPILL]); + (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_BEGIN], + (u_longlong_t)drr_byte_count[DRR_BEGIN]); + (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_END], + (u_longlong_t)drr_byte_count[DRR_END]); + (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_OBJECT], + (u_longlong_t)drr_byte_count[DRR_OBJECT]); + (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], + (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); + (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE], + (u_longlong_t)drr_byte_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], + (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " + "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], + (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); + (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREE], + (u_longlong_t)drr_byte_count[DRR_FREE]); + (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_SPILL], + (u_longlong_t)drr_byte_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", (u_longlong_t)total_records); - (void) printf("\tTotal write size = %lld (0x%llx)\n", - (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); + (void) printf("\tTotal payload size = %lld (0x%llx)\n", + (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); + (void) printf("\tTotal header overhead = %lld (0x%llx)\n", + (u_longlong_t)total_overhead_size, + (u_longlong_t)total_overhead_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); return (0); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 341b4a44d9ae..6bb023a5da57 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -74,7 +74,7 @@ * * To turn this into an overnight stress test, use -T to specify run time. * - * You can ask more more vdevs [-v], datasets [-d], or threads [-t] + * You can ask more vdevs [-v], datasets [-d], or threads [-t] * to increase the pool capacity, fanout, and overall stress level. * * Use the -k option to set the desired frequency of kills. @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -1692,7 +1693,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) if (zil_replaying(zd->zd_zilog, tx)) return; - if (lr->lr_length > ZIL_MAX_LOG_DATA) + if (lr->lr_length > zil_max_log_data(zd->zd_zilog)) write_state = WR_INDIRECT; itx = zil_itx_create(TX_WRITE, @@ -2745,8 +2746,24 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); + + /* + * We open a reference to the spa and then we try to export it + * expecting one of the following errors: + * + * EBUSY + * Because of the reference we just opened. + * + * ZFS_ERR_EXPORT_IN_PROGRESS + * For the case that there is another ztest thread doing + * an export concurrently. + */ VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); - VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); + int error = spa_destroy(zo->zo_pool); + if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) { + fatal(0, "spa_destroy(%s) returned unexpected value %d", + spa->spa_name, error); + } spa_close(spa, FTAG); (void) pthread_rwlock_unlock(&ztest_name_lock); @@ -7094,6 +7111,11 @@ ztest_run(ztest_shared_t *zs) metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; + /* + * BUGBUG - raidz expansion commented out below because raidz math library is excluded from makefile + * VERIFY0(vdev_raidz_impl_set("cycle")); + */ + dmu_objset_stats_t dds; VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool, DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os)); diff --git a/cmd/zvol_wait/Makefile.am b/cmd/zvol_wait/Makefile.am new file mode 100644 index 000000000000..564031c9799d --- /dev/null +++ b/cmd/zvol_wait/Makefile.am @@ -0,0 +1 @@ +dist_bin_SCRIPTS = zvol_wait diff --git a/cmd/zvol_wait/zvol_wait b/cmd/zvol_wait/zvol_wait new file mode 100755 index 000000000000..e5df82dd376a --- /dev/null +++ b/cmd/zvol_wait/zvol_wait @@ -0,0 +1,112 @@ +#!/bin/sh + +count_zvols() { + if [ -z "$zvols" ]; then + echo 0 + else + echo "$zvols" | wc -l + fi +} + +filter_out_zvols_with_links() { + while read -r zvol; do + if [ ! -L "/dev/zvol/$zvol" ]; then + echo "$zvol" + fi + done +} + +filter_out_deleted_zvols() { + while read -r zvol; do + if zfs list "$zvol" >/dev/null 2>&1; then + echo "$zvol" + fi + done +} + +list_zvols() { + zfs list -t volume -H -o name,volmode,receive_resume_token | + while read -r zvol_line; do + name=$(echo "$zvol_line" | awk '{print $1}') + volmode=$(echo "$zvol_line" | awk '{print $2}') + token=$(echo "$zvol_line" | awk '{print $3}') + # + # /dev links are not created for zvols with volmode = "none". + # + [ "$volmode" = "none" ] && continue + # + # We also also ignore partially received zvols if it is + # not an incremental receive, as those won't even have a block + # device minor node created yet. + # + if [ "$token" != "-" ]; then + # + # Incremental receives create an invisible clone that + # is not automatically displayed by zfs list. + # + if ! zfs list "$name/%recv" >/dev/null 2>&1; then + continue + fi + fi + echo "$name" + done +} + +zvols=$(list_zvols) +zvols_count=$(count_zvols) +if [ "$zvols_count" -eq 0 ]; then + echo "No zvols found, nothing to do." + exit 0 +fi + +echo "Testing $zvols_count zvol links" + +outer_loop=0 +while [ "$outer_loop" -lt 20 ]; do + outer_loop=$((outer_loop + 1)) + + old_zvols_count=$(count_zvols) + + inner_loop=0 + while [ "$inner_loop" -lt 30 ]; do + inner_loop=$((inner_loop + 1)) + + zvols="$(echo "$zvols" | filter_out_zvols_with_links)" + + zvols_count=$(count_zvols) + if [ "$zvols_count" -eq 0 ]; then + echo "All zvol links are now present." + exit 0 + fi + sleep 1 + done + + echo "Still waiting on $zvols_count zvol links ..." + # + # Although zvols should normally not be deleted at boot time, + # if that is the case then their links will be missing and + # we would stall. + # + if [ "$old_zvols_count" -eq "$zvols_count" ]; then + echo "No progress since last loop." + echo "Checking if any zvols were deleted." + + zvols=$(echo "$zvols" | filter_out_deleted_zvols) + zvols_count=$(count_zvols) + + if [ "$old_zvols_count" -ne "$zvols_count" ]; then + echo "$((old_zvols_count - zvols_count)) zvol(s) deleted." + fi + + if [ "$zvols_count" -ne 0 ]; then + echo "Remaining zvols:" + echo "$zvols" + else + echo "All zvol links are now present." + exit 0 + fi + fi +done + +echo "Timed out waiting on zvol links" +exit 1 diff --git a/config/always-python.m4 b/config/always-python.m4 index 858ab7b01582..c1c07597e688 100644 --- a/config/always-python.m4 +++ b/config/always-python.m4 @@ -1,36 +1,3 @@ -dnl # -dnl # ZFS_AC_PYTHON_VERSION(version, [action-if-true], [action-if-false]) -dnl # -dnl # Verify Python version -dnl # -AC_DEFUN([ZFS_AC_PYTHON_VERSION], [ - ver_check=`$PYTHON -c "import sys; print (sys.version.split()[[0]] $1)"` - AS_IF([test "$ver_check" = "True"], [ - m4_ifvaln([$2], [$2]) - ], [ - m4_ifvaln([$3], [$3]) - ]) -]) - -dnl # -dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) -dnl # -dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE -dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html -dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. -dnl # -AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ - PYTHON_NAME=`basename $PYTHON` - AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) - AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ - AC_MSG_RESULT(yes) - m4_ifvaln([$2], [$2]) - ], [ - AC_MSG_RESULT(no) - m4_ifvaln([$3], [$3]) - ]) -]) - dnl # dnl # The majority of the python scripts are written to be compatible dnl # with Python 2.6 and Python 3.4. Therefore, they may be installed @@ -46,50 +13,47 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ [with_python=check]) AS_CASE([$with_python], - [check], - [AS_IF([test -x /usr/bin/python3], - [PYTHON="python3"], - [AS_IF([test -x /usr/bin/python2], - [PYTHON="python2"], - [PYTHON=""] - )] - )], + [check], [AC_CHECK_PROGS([PYTHON], [python3 python2], [:])], [2*], [PYTHON="python${with_python}"], [*python2*], [PYTHON="${with_python}"], [3*], [PYTHON="python${with_python}"], [*python3*], [PYTHON="${with_python}"], - [no], [PYTHON=""], + [no], [PYTHON=":"], [AC_MSG_ERROR([Unknown --with-python value '$with_python'])] ) - AS_IF([$PYTHON --version >/dev/null 2>&1], [ /bin/true ], [ - AC_MSG_ERROR([Cannot find $PYTHON in your system path]) - ]) - - AM_PATH_PYTHON([2.6], [], [:]) - AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) - AM_CONDITIONAL([USING_PYTHON_2], [test "${PYTHON_VERSION:0:2}" = "2."]) - AM_CONDITIONAL([USING_PYTHON_3], [test "${PYTHON_VERSION:0:2}" = "3."]) - dnl # dnl # Minimum supported Python versions for utilities: - dnl # Python 2.6.x, or Python 3.4.x + dnl # Python 2.6 or Python 3.4 dnl # - AS_IF([test "${PYTHON_VERSION:0:2}" = "2."], [ - ZFS_AC_PYTHON_VERSION([>= '2.6'], [ /bin/true ], - [AC_MSG_ERROR("Python >= 2.6.x is not available")]) + AM_PATH_PYTHON([], [], [:]) + AS_IF([test -z "$PYTHON_VERSION"], [ + PYTHON_VERSION=$(basename $PYTHON | tr -cd 0-9.) ]) + PYTHON_MINOR=${PYTHON_VERSION#*\.} - AS_IF([test "${PYTHON_VERSION:0:2}" = "3."], [ - ZFS_AC_PYTHON_VERSION([>= '3.4'], [ /bin/true ], - [AC_MSG_ERROR("Python >= 3.4.x is not available")]) - ]) + AS_CASE([$PYTHON_VERSION], + [2.*], [ + AS_IF([test $PYTHON_MINOR -lt 6], + [AC_MSG_ERROR("Python >= 2.6 is required")]) + ], + [3.*], [ + AS_IF([test $PYTHON_MINOR -lt 4], + [AC_MSG_ERROR("Python >= 3.4 is required")]) + ], + [:|2|3], [], + [PYTHON_VERSION=3] + ) + + AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) + AM_CONDITIONAL([USING_PYTHON_2], [test "x${PYTHON_VERSION%%\.*}" = x2]) + AM_CONDITIONAL([USING_PYTHON_3], [test "x${PYTHON_VERSION%%\.*}" = x3]) dnl # dnl # Request that packages be built for a specific Python version. dnl # - AS_IF([test $with_python != check], [ - PYTHON_PKG_VERSION=`echo ${PYTHON} | tr -d 'a-zA-Z.'` + AS_IF([test "x$with_python" != xcheck], [ + PYTHON_PKG_VERSION=$(echo $PYTHON_VERSION | tr -d .) DEFINE_PYTHON_PKG_VERSION='--define "__use_python_pkg_version '${PYTHON_PKG_VERSION}'"' DEFINE_PYTHON_VERSION='--define "__use_python '${PYTHON}'"' ], [ diff --git a/config/always-pyzfs.m4 b/config/always-pyzfs.m4 index d74d6f1a756a..f620a8f9a18b 100644 --- a/config/always-pyzfs.m4 +++ b/config/always-pyzfs.m4 @@ -1,5 +1,24 @@ dnl # -dnl # Determines if pyzfs can be built, requires Python 2.7 or latter. +dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) +dnl # +dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE +dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html +dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. +dnl # +AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ + PYTHON_NAME=$(basename $PYTHON) + AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) + AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ + AC_MSG_RESULT(yes) + m4_ifvaln([$2], [$2]) + ], [ + AC_MSG_RESULT(no) + m4_ifvaln([$3], [$3]) + ]) +]) + +dnl # +dnl # Determines if pyzfs can be built, requires Python 2.7 or later. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ AC_ARG_ENABLE([pyzfs], @@ -18,7 +37,12 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ DEFINE_PYZFS='--without pyzfs' ]) ], [ - DEFINE_PYZFS='' + AS_IF([test "$PYTHON" != :], [ + DEFINE_PYZFS='' + ], [ + enable_pyzfs=no + DEFINE_PYZFS='--without pyzfs' + ]) ]) AC_SUBST(DEFINE_PYZFS) @@ -26,20 +50,16 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # Require python-devel libraries dnl # AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ - AS_IF([test "${PYTHON_VERSION:0:2}" = "2."], [ - PYTHON_REQUIRED_VERSION=">= '2.7.0'" - ], [ - AS_IF([test "${PYTHON_VERSION:0:2}" = "3."], [ - PYTHON_REQUIRED_VERSION=">= '3.4.0'" - ], [ - AC_MSG_ERROR("Python $PYTHON_VERSION unknown") - ]) - ]) + AS_CASE([$PYTHON_VERSION], + [3.*], [PYTHON_REQUIRED_VERSION=">= '3.4.0'"], + [2.*], [PYTHON_REQUIRED_VERSION=">= '2.7.0'"], + [AC_MSG_ERROR("Python $PYTHON_VERSION unknown")] + ) AX_PYTHON_DEVEL([$PYTHON_REQUIRED_VERSION], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_REQUIRED_VERSION development library is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -52,7 +72,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([setuptools], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION setuptools is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -65,7 +85,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([cffi], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION cffi is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -76,7 +96,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # AS_IF([test "x$enable_pyzfs" = xcheck], [enable_pyzfs=yes]) - AM_CONDITIONAL([PYZFS_ENABLED], [test x$enable_pyzfs = xyes]) + AM_CONDITIONAL([PYZFS_ENABLED], [test "x$enable_pyzfs" = xyes]) AC_SUBST([PYZFS_ENABLED], [$enable_pyzfs]) AC_SUBST(pythonsitedir, [$PYTHON_SITE_PKG]) diff --git a/config/ax_code_coverage.m4 b/config/ax_code_coverage.m4 index 4417d4444a96..5cdfe14562aa 100644 --- a/config/ax_code_coverage.m4 +++ b/config/ax_code_coverage.m4 @@ -50,7 +50,7 @@ # CODE_COVERAGE_LIBS is preferred for clarity; CODE_COVERAGE_LDFLAGS is # deprecated. They have the same value. # -# This code was derived from Makefile.decl in GLib, originally licenced +# This code was derived from Makefile.decl in GLib, originally licensed # under LGPLv2.1+. # # LICENSE diff --git a/config/deb.am b/config/deb.am index e405547aa949..83059a923493 100644 --- a/config/deb.am +++ b/config/deb.am @@ -20,7 +20,7 @@ deb-kmod: deb-local rpm-kmod arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 @@ -30,7 +30,7 @@ deb-dkms: deb-local rpm-dkms arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 deb-utils: deb-local rpm-utils @@ -45,7 +45,7 @@ deb-utils: deb-local rpm-utils pkg5=libzpool2-$${version}.$${arch}.rpm; \ pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \ pkg7=$${name}-test-$${version}.$${arch}.rpm; \ - pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \ + pkg8=$${name}-dracut-$${version}.noarch.rpm; \ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \ pkg10=`ls python*-pyzfs-$${version}* | tail -1`; \ ## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism @@ -63,7 +63,7 @@ deb-utils: deb-local rpm-utils env PATH=$${path_prepend}:$${PATH} \ fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch \ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ - $$pkg8 $$pkg9 $$pkg10; \ + $$pkg8 $$pkg9 $$pkg10 || exit 1; \ $(RM) $${path_prepend}/dh_shlibdeps; \ rmdir $${path_prepend}; \ $(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ diff --git a/config/iconv.m4 b/config/iconv.m4 index a285e9daa5ea..fc915fde6c56 100644 --- a/config/iconv.m4 +++ b/config/iconv.m4 @@ -269,8 +269,7 @@ size_t iconv(); [am_cv_proto_iconv_arg1="const"]) am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"]) am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` - AC_MSG_RESULT([ - $am_cv_proto_iconv]) + AC_MSG_RESULT([$am_cv_proto_iconv]) else dnl When compiling GNU libiconv on a system that does not have iconv yet, dnl pick the POSIX compliant declaration without 'const'. diff --git a/config/kernel-access-ok-type.m4 b/config/kernel-access-ok-type.m4 index 3b2878a55cb3..dc9433458703 100644 --- a/config/kernel-access-ok-type.m4 +++ b/config/kernel-access-ok-type.m4 @@ -4,17 +4,23 @@ dnl # dnl # - access_ok(type, addr, size) dnl # + access_ok(addr, size) dnl # -AC_DEFUN([ZFS_AC_KERNEL_ACCESS_OK_TYPE], [ - AC_MSG_CHECKING([whether access_ok() has 'type' parameter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE], [ + ZFS_LINUX_TEST_SRC([access_ok_type], [ #include ],[ - const void __user __attribute__((unused)) *addr = (void *) 0xdeadbeef; + const void __user __attribute__((unused)) *addr = + (void *) 0xdeadbeef; unsigned long __attribute__((unused)) size = 1; int error __attribute__((unused)) = access_ok(0, addr, size); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACCESS_OK_TYPE], [ + AC_MSG_CHECKING([whether access_ok() has 'type' parameter]) + ZFS_LINUX_TEST_RESULT([access_ok_type], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ACCESS_OK_TYPE, 1, [kernel has access_ok with 'type' parameter]) + AC_DEFINE(HAVE_ACCESS_OK_TYPE, 1, + [kernel has access_ok with 'type' parameter]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 index 02cc020e5c90..68a72872d815 100644 --- a/config/kernel-acl.m4 +++ b/config/kernel-acl.m4 @@ -3,32 +3,26 @@ dnl # Check if posix_acl_release can be used from a ZFS_META_LICENSED dnl # module. The is_owner_or_cap macro was replaced by dnl # inode_owner_or_capable dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ - AC_MSG_CHECKING([whether posix_acl_release() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE], [ + ZFS_LINUX_TEST_SRC([posix_acl_release], [ #include #include #include - ],[ - struct posix_acl* tmp = posix_acl_alloc(1, 0); + ], [ + struct posix_acl *tmp = posix_acl_alloc(1, 0); posix_acl_release(tmp); - ],[ + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ + AC_MSG_CHECKING([whether posix_acl_release() is available]) + ZFS_LINUX_TEST_RESULT([posix_acl_release], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_RELEASE, 1, [posix_acl_release() is available]) AC_MSG_CHECKING([whether posix_acl_release() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct posix_acl* tmp = posix_acl_alloc(1, 0); - posix_acl_release(tmp); - ],[ + ZFS_LINUX_TEST_RESULT([posix_acl_release_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) @@ -46,24 +40,25 @@ dnl # set_cached_acl() and forget_cached_acl() changed from inline to dnl # EXPORT_SYMBOL. In the former case, they may not be usable because of dnl # posix_acl_release. In the latter case, we can always use them. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ - AC_MSG_CHECKING([whether set_cached_acl() is usable]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE], [ + ZFS_LINUX_TEST_SRC([set_cached_acl], [ #include #include #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ + ], [ struct inode *ip = NULL; struct posix_acl *acl = posix_acl_alloc(1, 0); set_cached_acl(ip, ACL_TYPE_ACCESS, acl); forget_cached_acl(ip, ACL_TYPE_ACCESS); - ],[ + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ + AC_MSG_CHECKING([whether set_cached_acl() is usable]) + ZFS_LINUX_TEST_RESULT([set_cached_acl_license], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SET_CACHED_ACL_USABLE, 1, - [posix_acl_release() is usable]) + [set_cached_acl() is usable]) ],[ AC_MSG_RESULT(no) ]) @@ -77,14 +72,25 @@ dnl # dnl # 3.14 API change, dnl # posix_acl_chmod() is changed to __posix_acl_chmod() dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ - AC_MSG_CHECKING([whether posix_acl_chmod exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD], [ + ZFS_LINUX_TEST_SRC([posix_acl_chmod], [ #include #include ],[ posix_acl_chmod(NULL, 0, 0) + ]) + + ZFS_LINUX_TEST_SRC([__posix_acl_chmod], [ + #include + #include ],[ + __posix_acl_chmod(NULL, 0, 0) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ + AC_MSG_CHECKING([whether posix_acl_chmod exists]) + ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, [posix_acl_chmod() exists]) ],[ @@ -92,14 +98,10 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ ]) AC_MSG_CHECKING([whether __posix_acl_chmod exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - __posix_acl_chmod(NULL, 0, 0) - ],[ + ZFS_LINUX_TEST_RESULT([__posix_acl_chmod], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, [__posix_acl_chmod() exists]) + AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, + [__posix_acl_chmod() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -109,18 +111,22 @@ dnl # dnl # 3.1 API change, dnl # posix_acl_equiv_mode now wants an umode_t* instead of a mode_t* dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ - AC_MSG_CHECKING([whether posix_acl_equiv_mode() wants umode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ + ZFS_LINUX_TEST_SRC([posix_acl_equiv_mode], [ #include #include ],[ umode_t tmp; posix_acl_equiv_mode(NULL,&tmp); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ + AC_MSG_CHECKING([whether posix_acl_equiv_mode() wants umode_t]) + ZFS_LINUX_TEST_RESULT([posix_acl_equiv_mode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T, 1, - [ posix_acl_equiv_mode wants umode_t*]) + [posix_acl_equiv_mode wants umode_t*]) ],[ AC_MSG_RESULT(no) ]) @@ -130,9 +136,8 @@ dnl # dnl # 4.8 API change, dnl # The function posix_acl_valid now must be passed a namespace. dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ - AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS], [ + ZFS_LINUX_TEST_SRC([posix_acl_valid_with_ns], [ #include #include ],[ @@ -141,7 +146,12 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ int error; error = posix_acl_valid(user_ns, acl); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ + AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) + ZFS_LINUX_TEST_RESULT([posix_acl_valid_with_ns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_VALID_WITH_NS, 1, [posix_acl_valid() wants user namespace]) @@ -155,9 +165,8 @@ dnl # 2.6.27 API change, dnl # Check if inode_operations contains the function permission dnl # and expects the nameidata structure to have been removed. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ - AC_MSG_CHECKING([whether iops->permission() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION], [ + ZFS_LINUX_TEST_SRC([inode_operations_permission], [ #include int permission_fn(struct inode *inode, int mask) { return 0; } @@ -166,8 +175,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ iops __attribute__ ((unused)) = { .permission = permission_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [ + AC_MSG_CHECKING([whether iops->permission() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_permission], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists]) ],[ @@ -180,9 +193,8 @@ dnl # 2.6.26 API change, dnl # Check if inode_operations contains the function permission dnl # and expects the nameidata structure to be passed. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->permission() wants nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([inode_operations_permission_with_nameidata], [ #include #include @@ -193,8 +205,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ iops __attribute__ ((unused)) = { .permission = permission_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->permission() wants nameidata]) + ZFS_LINUX_TEST_RESULT([inode_operations_permission_with_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists]) AC_DEFINE(HAVE_PERMISSION_WITH_NAMEIDATA, 1, @@ -208,9 +224,8 @@ dnl # dnl # 2.6.32 API change, dnl # Check if inode_operations contains the function check_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ - AC_MSG_CHECKING([whether iops->check_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_check_acl], [ #include int check_acl_fn(struct inode *inode, int mask) { return 0; } @@ -219,8 +234,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ iops __attribute__ ((unused)) = { .check_acl = check_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [ + AC_MSG_CHECKING([whether iops->check_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_check_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists]) ],[ @@ -232,9 +251,8 @@ dnl # dnl # 2.6.38 API change, dnl # The function check_acl gained a new parameter: flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ - AC_MSG_CHECKING([whether iops->check_acl() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_operations_check_acl_with_flags], [ #include int check_acl_fn(struct inode *inode, int mask, @@ -244,8 +262,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ iops __attribute__ ((unused)) = { .check_acl = check_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [ + AC_MSG_CHECKING([whether iops->check_acl() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_check_acl_with_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists]) AC_DEFINE(HAVE_CHECK_ACL_WITH_FLAGS, 1, @@ -259,9 +281,8 @@ dnl # dnl # 3.1 API change, dnl # Check if inode_operations contains the function get_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ - AC_MSG_CHECKING([whether iops->get_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [ #include struct posix_acl *get_acl_fn(struct inode *inode, int type) @@ -271,8 +292,12 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ iops __attribute__ ((unused)) = { .get_acl = get_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ + AC_MSG_CHECKING([whether iops->get_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_get_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_ACL, 1, [iops->get_acl() exists]) ],[ @@ -284,20 +309,23 @@ dnl # dnl # 3.14 API change, dnl # Check if inode_operations contains the function set_acl dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ - AC_MSG_CHECKING([whether iops->set_acl() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ + ZFS_LINUX_TEST_SRC([inode_operations_set_acl], [ #include - int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type) - { return 0; } + int set_acl_fn(struct inode *inode, struct posix_acl *acl, + int type) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { .set_acl = set_acl_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ + AC_MSG_CHECKING([whether iops->set_acl() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) ],[ @@ -311,16 +339,79 @@ dnl # The kernel get_acl will now check cache before calling i_op->get_acl and dnl # do set_cached_acl after that, so i_op->get_acl don't need to do that dnl # anymore. dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE], [ + ZFS_LINUX_TEST_SRC([get_acl_handle_cache], [ + #include + ],[ + void *sentinel __attribute__ ((unused)) = + uncached_acl_sentinel(NULL); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE], [ AC_MSG_CHECKING([whether uncached_acl_sentinel() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include + ZFS_LINUX_TEST_RESULT([get_acl_handle_cache], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, + [uncached_acl_sentinel() exists]) ],[ - void *sentinel __attribute__ ((unused)) = uncached_acl_sentinel(NULL); + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 4.16 kernel: check if struct posix_acl acl.a_refcount is a refcount_t. +dnl # It's an atomic_t on older kernels. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT], [ + ZFS_LINUX_TEST_SRC([acl_refcount], [ + #include + #include + #include ],[ + struct posix_acl acl; + refcount_t *r __attribute__ ((unused)) = &acl.a_refcount; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACL_HAS_REFCOUNT], [ + AC_MSG_CHECKING([whether posix_acl has refcount_t]) + ZFS_LINUX_TEST_RESULT([acl_refcount], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, [uncached_acl_sentinel() exists]) + AC_DEFINE(HAVE_ACL_REFCOUNT, 1, [posix_acl has refcount_t]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL], [ + ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE + ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE + ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD + ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T + ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL + ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL + ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE + ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT +]) + +AC_DEFUN([ZFS_AC_KERNEL_ACL], [ + ZFS_AC_KERNEL_POSIX_ACL_RELEASE + ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE + ZFS_AC_KERNEL_POSIX_ACL_CHMOD + ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T + ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS + ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION + ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA + ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL + ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS + ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL + ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL + ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE + ZFS_AC_KERNEL_ACL_HAS_REFCOUNT +]) diff --git a/config/kernel-aio-fsync.m4 b/config/kernel-aio-fsync.m4 index 41b7a98a6b06..b4dbf29ba781 100644 --- a/config/kernel-aio-fsync.m4 +++ b/config/kernel-aio-fsync.m4 @@ -1,21 +1,23 @@ dnl # dnl # Linux 4.9-rc5+ ABI, removal of the .aio_fsync field dnl # -AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ - AC_MSG_CHECKING([whether fops->aio_fsync() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_AIO_FSYNC], [ + ZFS_LINUX_TEST_SRC([aio_fsync], [ #include static const struct file_operations fops __attribute__ ((unused)) = { .aio_fsync = NULL, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ + AC_MSG_CHECKING([whether fops->aio_fsync() exists]) + ZFS_LINUX_TEST_RESULT([aio_fsync], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_AIO_FSYNC, 1, [fops->aio_fsync() exists]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-automount.m4 b/config/kernel-automount.m4 index 1ee4c168d43e..93e14fa8d634 100644 --- a/config/kernel-automount.m4 +++ b/config/kernel-automount.m4 @@ -5,16 +5,19 @@ dnl # solution to handling automounts. Prior to this cifs/nfs clients dnl # which required automount support would abuse the follow_link() dnl # operation on directories for this purpose. dnl # -AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ - AC_MSG_CHECKING([whether dops->d_automount() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ + ZFS_LINUX_TEST_SRC([dentry_operations_d_automount], [ #include struct vfsmount *d_automount(struct path *p) { return NULL; } struct dentry_operations dops __attribute__ ((unused)) = { .d_automount = d_automount, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [ + AC_MSG_CHECKING([whether dops->d_automount() exists]) + ZFS_LINUX_TEST_RESULT([dentry_operations_d_automount], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_AUTOMOUNT, 1, [dops->automount() exists]) ],[ diff --git a/config/kernel-bdev-logical-size.m4 b/config/kernel-bdev-logical-size.m4 index a6194577abbd..0de9afd8888e 100644 --- a/config/kernel-bdev-logical-size.m4 +++ b/config/kernel-bdev-logical-size.m4 @@ -5,21 +5,22 @@ dnl # it has been true for a while that there was no strict 1:1 mapping dnl # between physical sector size and logical block size this change makes dnl # it explicit. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE], [ - AC_MSG_CHECKING([whether bdev_logical_block_size() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE], [ + ZFS_LINUX_TEST_SRC([bdev_logical_block_size], [ #include ],[ struct block_device *bdev = NULL; bdev_logical_block_size(bdev); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether bdev_logical_block_size() is available]) + ZFS_LINUX_TEST_RESULT([bdev_logical_block_size], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BDEV_LOGICAL_BLOCK_SIZE, 1, - [bdev_logical_block_size() is available]) + [bdev_logical_block_size() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-bdev-physical-size.m4 b/config/kernel-bdev-physical-size.m4 index 77746ee91690..94d8172d3d76 100644 --- a/config/kernel-bdev-physical-size.m4 +++ b/config/kernel-bdev-physical-size.m4 @@ -19,21 +19,22 @@ dnl # dnl # Unfortunately, this interface isn't entirely reliable because dnl # drives are sometimes known to misreport this value. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [ - AC_MSG_CHECKING([whether bdev_physical_block_size() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE], [ + ZFS_LINUX_TEST_SRC([bdev_physical_block_size], [ #include ],[ struct block_device *bdev = NULL; bdev_physical_block_size(bdev); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether bdev_physical_block_size() is available]) + ZFS_LINUX_TEST_RESULT([bdev_physical_block_size], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BDEV_PHYSICAL_BLOCK_SIZE, 1, - [bdev_physical_block_size() is available]) + [bdev_physical_block_size() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-bdi.m4 b/config/kernel-bdi.m4 index cb7479ee9c46..51516332a943 100644 --- a/config/kernel-bdi.m4 +++ b/config/kernel-bdi.m4 @@ -1,55 +1,81 @@ dnl # -dnl # 2.6.32 - 2.6.33, bdi_setup_and_register() is not exported. -dnl # 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments. -dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. -dnl # 4.12 - x.y, super_setup_bdi_name() new interface. +dnl # Check available BDI interfaces. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDI], [ - AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDI], [ + ZFS_LINUX_TEST_SRC([super_setup_bdi_name], [ #include struct super_block sb; ], [ char *name = "bdi"; atomic_long_t zfs_bdi_seq; int error __attribute__((unused)) = - super_setup_bdi_name(&sb, "%.28s-%ld", name, atomic_long_inc_return(&zfs_bdi_seq)); - ], [super_setup_bdi_name], [fs/super.c], [ + super_setup_bdi_name(&sb, "%.28s-%ld", name, + atomic_long_inc_return(&zfs_bdi_seq)); + ]) + + ZFS_LINUX_TEST_SRC([bdi_setup_and_register], [ + #include + struct backing_dev_info bdi; + ], [ + char *name = "bdi"; + int error __attribute__((unused)) = + bdi_setup_and_register(&bdi, name); + ]) + + ZFS_LINUX_TEST_SRC([bdi_setup_and_register_3args], [ + #include + struct backing_dev_info bdi; + ], [ + char *name = "bdi"; + unsigned int cap = BDI_CAP_MAP_COPY; + int error __attribute__((unused)) = + bdi_setup_and_register(&bdi, name, cap); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDI], [ + dnl # + dnl # 4.12, super_setup_bdi_name() introduced. + dnl # + AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) + ZFS_LINUX_TEST_RESULT_SYMBOL([super_setup_bdi_name], + [super_setup_bdi_name], [fs/super.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_SETUP_BDI_NAME, 1, [super_setup_bdi_name() exits]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. + dnl # AC_MSG_CHECKING( [whether bdi_setup_and_register() wants 2 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name); - ], [bdi_setup_and_register], [mm/backing-dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL([bdi_setup_and_register], + [bdi_setup_and_register], [mm/backing-dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_BDI_SETUP_AND_REGISTER, 1, [bdi_setup_and_register() wants 2 args]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 2.6.34 - 3.19, bdi_setup_and_register() + dnl # takes 3 arguments. + dnl # AC_MSG_CHECKING( [whether bdi_setup_and_register() wants 3 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - unsigned int cap = BDI_CAP_MAP_COPY; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name, cap); - ], [bdi_setup_and_register], [mm/backing-dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL( + [bdi_setup_and_register_3args], + [bdi_setup_and_register], [mm/backing-dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_BDI_SETUP_AND_REGISTER, 1, [bdi_setup_and_register() wants 3 args]) ], [ + dnl # + dnl # 2.6.32 - 2.6.33, bdi_setup_and_register() + dnl # is not exported. + dnl # AC_MSG_RESULT(no) ]) ]) diff --git a/config/kernel-bio-bvec-iter.m4 b/config/kernel-bio-bvec-iter.m4 index 64c989386b34..f9a99cee6b47 100644 --- a/config/kernel-bio-bvec-iter.m4 +++ b/config/kernel-bio-bvec-iter.m4 @@ -3,18 +3,21 @@ dnl # 3.14 API change, dnl # Immutable biovecs. A number of fields of struct bio are moved to dnl # struct bvec_iter. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ - AC_MSG_CHECKING([whether bio has bi_iter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [ + ZFS_LINUX_TEST_SRC([bio_bvec_iter], [ #include ],[ struct bio bio; bio.bi_iter.bi_sector = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ + AC_MSG_CHECKING([whether bio has bi_iter]) + ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-bio-end-io-t-args.m4 b/config/kernel-bio-end-io-t-args.m4 index 3c420cc0c305..80a1fbedad9c 100644 --- a/config/kernel-bio-end-io-t-args.m4 +++ b/config/kernel-bio-end-io-t-args.m4 @@ -5,20 +5,21 @@ dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE. dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451 dnl # ("block: add a bi_error field to struct bio"). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ - AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ + ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ #include - void wanted_end_io(struct bio *bio) { return; } - bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; - ],[ - ],[ + ], []) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ + AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) + ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1, - [bio_end_io_t wants 1 arg]) - ],[ + [bio_end_io_t wants 1 arg]) + ], [ AC_MSG_RESULT(no) ]) ]) @@ -28,16 +29,19 @@ dnl # 4.13 API change dnl # The bio->bi_error field was replaced with bio->bi_status which is an dnl # enum which describes all possible error types. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ - AC_MSG_CHECKING([whether bio->bi_status exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [ + ZFS_LINUX_TEST_SRC([bio_bi_status], [ #include - ],[ + ], [ struct bio bio __attribute__ ((unused)); blk_status_t status __attribute__ ((unused)) = BLK_STS_OK; - bio.bi_status = status; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ + AC_MSG_CHECKING([whether bio->bi_status exists]) + ZFS_LINUX_TEST_RESULT([bio_bi_status], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists]) ],[ diff --git a/config/kernel-bio-failfast.m4 b/config/kernel-bio-failfast.m4 index cfbec05238ce..0c636f08cc85 100644 --- a/config/kernel-bio-failfast.m4 +++ b/config/kernel-bio-failfast.m4 @@ -3,37 +3,54 @@ dnl # Preferred interface for setting FAILFAST on a bio: dnl # 2.6.28-2.6.35: BIO_RW_FAILFAST_{DEV|TRANSPORT|DRIVER} dnl # >= 2.6.36: REQ_FAILFAST_{DEV|TRANSPORT|DRIVER} dnl # - -AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST_DTD], [ - AC_MSG_CHECKING([whether BIO_RW_FAILFAST_* are defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD], [ + ZFS_LINUX_TEST_SRC([bio_failfast_dtd], [ #include ],[ int flags __attribute__ ((unused)); flags = ((1 << BIO_RW_FAILFAST_DEV) | (1 << BIO_RW_FAILFAST_TRANSPORT) | (1 << BIO_RW_FAILFAST_DRIVER)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST_DTD], [ + AC_MSG_CHECKING([whether BIO_RW_FAILFAST_* are defined]) + ZFS_LINUX_TEST_RESULT([bio_failfast_dtd], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_FAILFAST_DTD, 1, - [BIO_RW_FAILFAST_* are defined]) + [BIO_RW_FAILFAST_* are defined]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_REQ_FAILFAST_MASK], [ - AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK], [ + ZFS_LINUX_TEST_SRC([bio_failfast_mask], [ #include ],[ int flags __attribute__ ((unused)); flags = REQ_FAILFAST_MASK; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_FAILFAST_MASK], [ + AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) + ZFS_LINUX_TEST_RESULT([bio_failfast_mask], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_FAILFAST_MASK, 1, - [REQ_FAILFAST_MASK is defined]) + [REQ_FAILFAST_MASK is defined]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST], [ + ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST], [ + ZFS_AC_KERNEL_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_REQ_FAILFAST_MASK +]) diff --git a/config/kernel-bio-op.m4 b/config/kernel-bio-op.m4 index 8299e490c2c9..1f2d23791aee 100644 --- a/config/kernel-bio-op.m4 +++ b/config/kernel-bio-op.m4 @@ -5,13 +5,43 @@ dnl # The bio_op() helper was introduced as a replacement for explicitly dnl # checking the bio->bi_rw flags. The following checks are used to dnl # detect if a specific operation is supported. dnl # -AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ - AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ + ZFS_LINUX_TEST_SRC([req_op_discard], [ #include ],[ int op __attribute__ ((unused)) = REQ_OP_DISCARD; + ]) + + ZFS_LINUX_TEST_SRC([req_op_secure_erase], [ + #include ],[ + int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; + ]) + + ZFS_LINUX_TEST_SRC([req_op_flush], [ + #include + ],[ + int op __attribute__ ((unused)) = REQ_OP_FLUSH; + ]) + + ZFS_LINUX_TEST_SRC([bio_bi_opf], [ + #include + ],[ + struct bio bio __attribute__ ((unused)); + bio.bi_opf = 0; + ]) + + ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [ + #include + ],[ + struct bio *bio __attribute__ ((unused)) = NULL; + bio_set_op_attrs(bio, 0, 0); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ + AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) + ZFS_LINUX_TEST_RESULT([req_op_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, [REQ_OP_DISCARD is defined]) @@ -22,11 +52,7 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; - ],[ + ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1, [REQ_OP_SECURE_ERASE is defined]) @@ -38,14 +64,9 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_FLUSH; - ],[ + ZFS_LINUX_TEST_RESULT([req_op_flush], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, - [REQ_OP_FLUSH is defined]) + AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined]) ],[ AC_MSG_RESULT(no) ]) @@ -53,12 +74,7 @@ AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ AC_MSG_CHECKING([whether bio->bi_opf is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct bio bio __attribute__ ((unused)); - bio.bi_opf = 0; - ],[ + ZFS_LINUX_TEST_RESULT([bio_bi_opf], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) ],[ @@ -68,13 +84,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ AC_DEFUN([ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS], [ AC_MSG_CHECKING([whether bio_set_op_attrs is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct bio *bio __attribute__ ((unused)) = NULL; - - bio_set_op_attrs(bio, 0, 0); - ],[ + ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_SET_OP_ATTRS, 1, [bio_set_op_attrs is available]) @@ -82,3 +92,11 @@ AC_DEFUN([ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_OPS], [ + ZFS_AC_KERNEL_REQ_OP_DISCARD + ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE + ZFS_AC_KERNEL_REQ_OP_FLUSH + ZFS_AC_KERNEL_BIO_BI_OPF + ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS +]) diff --git a/config/kernel-bio-rw-barrier.m4 b/config/kernel-bio-rw-barrier.m4 index bcf0f7ea00b0..f667d48844ff 100644 --- a/config/kernel-bio-rw-barrier.m4 +++ b/config/kernel-bio-rw-barrier.m4 @@ -3,20 +3,25 @@ dnl # Interface for issuing a discard bio: dnl # 2.6.28-2.6.35: BIO_RW_BARRIER dnl # 2.6.36-3.x: REQ_BARRIER dnl # - +dnl # dnl # Since REQ_BARRIER is a preprocessor definition, there is no need for an dnl # autotools check for it. Also, REQ_BARRIER existed in the request layer dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the dnl # request layer and bio layer flags, so it would be wrong to assume that dnl # the APIs are mutually exclusive contrary to the typical case. -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [ - AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined]) - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER], [ + ZFS_LINUX_TEST_SRC([bio_rw_barrier], [ #include ],[ int flags __attribute__ ((unused)); flags = BIO_RW_BARRIER; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [ + AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined]) + ZFS_LINUX_TEST_RESULT([bio_rw_barrier], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_BARRIER, 1, [BIO_RW_BARRIER is defined]) ],[ diff --git a/config/kernel-bio-rw-discard.m4 b/config/kernel-bio-rw-discard.m4 index 0554b9a9dae0..34a89279c20f 100644 --- a/config/kernel-bio-rw-discard.m4 +++ b/config/kernel-bio-rw-discard.m4 @@ -3,20 +3,25 @@ dnl # Interface for issuing a discard bio: dnl # 2.6.28-2.6.35: BIO_RW_DISCARD dnl # 2.6.36-3.x: REQ_DISCARD dnl # - +dnl # dnl # Since REQ_DISCARD is a preprocessor definition, there is no need for an dnl # autotools check for it. Also, REQ_DISCARD existed in the request layer dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the dnl # request layer and bio layer flags, so it would be wrong to assume that dnl # the APIs are mutually exclusive contrary to the typical case. -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [ - AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined]) - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD], [ + ZFS_LINUX_TEST_SRC([bio_rw_discard], [ #include ],[ int flags __attribute__ ((unused)); flags = BIO_RW_DISCARD; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [ + AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined]) + ZFS_LINUX_TEST_RESULT([bio_rw_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BIO_RW_DISCARD, 1, [BIO_RW_DISCARD is defined]) ],[ diff --git a/config/kernel-bio_set_dev.m4 b/config/kernel-bio_set_dev.m4 index 71d47a89309f..b8e13f35ac09 100644 --- a/config/kernel-bio_set_dev.m4 +++ b/config/kernel-bio_set_dev.m4 @@ -3,51 +3,38 @@ dnl # Linux 4.14 API, dnl # dnl # The bio_set_dev() helper macro was introduced as part of the transition dnl # to have struct gendisk in struct bio. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV_MACRO], [ - AC_MSG_CHECKING([whether bio_set_dev() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct block_device *bdev = NULL; - struct bio *bio = NULL; - bio_set_dev(bio, bdev); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 5.0 API, dnl # dnl # The bio_set_dev() helper macro was updated to internally depend on dnl # bio_associate_blkg() symbol which is exported GPL-only. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV_GPL_ONLY], [ - AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [ + ZFS_LINUX_TEST_SRC([bio_set_dev], [ #include #include - MODULE_LICENSE("$ZFS_META_LICENSE"); ],[ struct block_device *bdev = NULL; struct bio *bio = NULL; bio_set_dev(bio, bdev); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, - [bio_set_dev() GPL-only]) - ]) + ], [], [$ZFS_META_LICENSE]) ]) AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [ - ZFS_AC_KERNEL_BIO_SET_DEV_MACRO - ZFS_AC_KERNEL_BIO_SET_DEV_GPL_ONLY + AC_MSG_CHECKING([whether bio_set_dev() is available]) + ZFS_LINUX_TEST_RESULT([bio_set_dev], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available]) + + AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ + AC_MSG_RESULT(no) + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, + [bio_set_dev() GPL-only]) + ]) + ],[ + AC_MSG_RESULT(no) + ]) ]) diff --git a/config/kernel-blk-queue-bdi.m4 b/config/kernel-blk-queue-bdi.m4 index 816471166a5a..28241c4944c1 100644 --- a/config/kernel-blk-queue-bdi.m4 +++ b/config/kernel-blk-queue-bdi.m4 @@ -2,15 +2,19 @@ dnl # dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue dnl # 4.12 - x.y, dynamically allocated bdi in request_queue dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [ - AC_MSG_CHECKING([whether blk_queue bdi is dynamic]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [ + ZFS_LINUX_TEST_SRC([blk_queue_bdi], [ #include ],[ struct request_queue q; struct backing_dev_info bdi; q.backing_dev_info = &bdi; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [ + AC_MSG_CHECKING([whether blk_queue bdi is dynamic]) + ZFS_LINUX_TEST_RESULT([blk_queue_bdi], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1, [blk queue backing_dev_info is dynamic]) diff --git a/config/kernel-blk-queue-discard.m4 b/config/kernel-blk-queue-discard.m4 index addbba81447f..85a29356def0 100644 --- a/config/kernel-blk-queue-discard.m4 +++ b/config/kernel-blk-queue-discard.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.32 - 4.x API, dnl # blk_queue_discard() dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ - AC_MSG_CHECKING([whether blk_queue_discard() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [ + ZFS_LINUX_TEST_SRC([blk_queue_discard], [ #include ],[ struct request_queue *q __attribute__ ((unused)) = NULL; int value __attribute__ ((unused)); - value = blk_queue_discard(q); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ + AC_MSG_CHECKING([whether blk_queue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_discard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1, [blk_queue_discard() is available]) @@ -30,16 +33,27 @@ dnl # dnl # 2.6.x - 2.6.35 API, dnl # Unsupported by kernel dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ - AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ + ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [ #include ],[ struct request_queue *q __attribute__ ((unused)) = NULL; int value __attribute__ ((unused)); - value = blk_queue_secure_erase(q); + ]) + + ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [ + #include ],[ + struct request_queue *q __attribute__ ((unused)) = NULL; + int value __attribute__ ((unused)); + value = blk_queue_secdiscard(q); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ + AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, [blk_queue_secure_erase() is available]) @@ -47,14 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct request_queue *q __attribute__ ((unused)) = NULL; - int value __attribute__ ((unused)); - - value = blk_queue_secdiscard(q); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, [blk_queue_secdiscard() is available]) diff --git a/config/kernel-blk-queue-flags.m4 b/config/kernel-blk-queue-flags.m4 index b570245c74d4..9d4dfc159e8e 100644 --- a/config/kernel-blk-queue-flags.m4 +++ b/config/kernel-blk-queue-flags.m4 @@ -3,36 +3,54 @@ dnl # API change dnl # https://github.com/torvalds/linux/commit/8814ce8 dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear dnl # - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ - AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [ + ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [ #include #include ],[ struct request_queue *q = NULL; blk_queue_flag_set(0, q); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ + AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) + ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, [blk_queue_flag_set() exists]) + AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, + [blk_queue_flag_set() exists]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ - AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [ + ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [ #include #include ],[ struct request_queue *q = NULL; blk_queue_flag_clear(0, q); - ],[ + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ + AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) + ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, [blk_queue_flag_clear() exists]) + AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, + [blk_queue_flag_clear() exists]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS], [ + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAGS], [ + ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR +]) diff --git a/config/kernel-blk-queue-flush.m4 b/config/kernel-blk-queue-flush.m4 index 1baab83a4e8c..b546d940018b 100644 --- a/config/kernel-blk-queue-flush.m4 +++ b/config/kernel-blk-queue-flush.m4 @@ -9,35 +9,37 @@ dnl # there we implement our own compatibility function, otherwise dnl # we use the function. The hope is that long term this function dnl # will be opened up. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ - AC_MSG_CHECKING([whether blk_queue_flush() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [ + ZFS_LINUX_TEST_SRC([blk_queue_flush], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_flush(q, REQ_FLUSH); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [ + #include + #include + ], [ + struct request_queue *q = NULL; + blk_queue_write_cache(q, true, true); + ], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ + AC_MSG_CHECKING([whether blk_queue_flush() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_flush], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1, - [blk_queue_flush() is available]) + [blk_queue_flush() is available]) AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request_queue *q = NULL; - (void) blk_queue_flush(q, REQ_FLUSH); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1, - [blk_queue_flush() is GPL-only]) + [blk_queue_flush() is GPL-only]) ]) ],[ AC_MSG_RESULT(no) @@ -48,38 +50,20 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ dnl # Replace blk_queue_flush with blk_queue_write_cache dnl # AC_MSG_CHECKING([whether blk_queue_write_cache() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - - ],[ - struct request_queue *q = NULL; - blk_queue_write_cache(q, true, true); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1, - [blk_queue_write_cache() exists]) + [blk_queue_write_cache() exists]) AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request_queue *q = NULL; - blk_queue_write_cache(q, true, true); - ],[ + ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1, - [blk_queue_write_cache() is GPL-only]) + [blk_queue_write_cache() is GPL-only]) ]) ],[ AC_MSG_RESULT(no) ]) - - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-max-hw-sectors.m4 b/config/kernel-blk-queue-max-hw-sectors.m4 index 2f5515dc6b7b..7387f84de749 100644 --- a/config/kernel-blk-queue-max-hw-sectors.m4 +++ b/config/kernel-blk-queue-max-hw-sectors.m4 @@ -2,21 +2,22 @@ dnl # dnl # 2.6.34 API change dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [ - AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [ + ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [ + AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_max_hw_sectors], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1, - [blk_queue_max_hw_sectors() is available]) + [blk_queue_max_hw_sectors() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-max-segments.m4 b/config/kernel-blk-queue-max-segments.m4 index b2a40423a5c8..1e4092df9aca 100644 --- a/config/kernel-blk-queue-max-segments.m4 +++ b/config/kernel-blk-queue-max-segments.m4 @@ -3,21 +3,22 @@ dnl # 2.6.34 API change dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments() dnl # and blk_queue_max_phys_segments(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ - AC_MSG_CHECKING([whether blk_queue_max_segments() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [ + ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [ #include - ],[ + ], [ struct request_queue *q = NULL; (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ + AC_MSG_CHECKING([whether blk_queue_max_segments() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_max_segments], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1, - [blk_queue_max_segments() is available]) - ],[ + [blk_queue_max_segments() is available]) + ], [ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-blk-queue-unplug.m4 b/config/kernel-blk-queue-unplug.m4 index 075fbccd1a53..f5d1814b83a8 100644 --- a/config/kernel-blk-queue-unplug.m4 +++ b/config/kernel-blk-queue-unplug.m4 @@ -2,43 +2,53 @@ dnl # dnl # 2.6.32-2.6.35 API - The BIO_RW_UNPLUG enum can be used as a hint dnl # to unplug the queue. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG], [ - AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG], [ + ZFS_LINUX_TEST_SRC([blk_queue_bio_rw_unplug], [ #include ],[ - extern enum bio_rw_flags rw; + enum bio_rw_flags rw __attribute__ ((unused)) = BIO_RW_UNPLUG; + ]) +]) - rw = BIO_RW_UNPLUG; - ],[ +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_UNPLUG], [ + AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_bio_rw_unplug], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG, 1, - [BIO_RW_UNPLUG is available]) + [BIO_RW_UNPLUG is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG], [ - AC_MSG_CHECKING([whether struct blk_plug is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_PLUG], [ + ZFS_LINUX_TEST_SRC([blk_plug], [ #include ],[ - struct blk_plug plug; + struct blk_plug plug __attribute__ ((unused)); blk_start_plug(&plug); blk_finish_plug(&plug); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_PLUG], [ + AC_MSG_CHECKING([whether struct blk_plug is available]) + ZFS_LINUX_TEST_RESULT([blk_plug], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BLK_PLUG, 1, - [struct blk_plug is available]) + [struct blk_plug is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG], [ + ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG + ZFS_AC_KERNEL_SRC_BLK_PLUG +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [ + ZFS_AC_KERNEL_BIO_RW_UNPLUG + ZFS_AC_KERNEL_BLK_PLUG ]) diff --git a/config/kernel-blkdev-get-by-path.m4 b/config/kernel-blkdev-get-by-path.m4 index 40ecc06b6c92..fb0cea6af59c 100644 --- a/config/kernel-blkdev-get-by-path.m4 +++ b/config/kernel-blkdev-get-by-path.m4 @@ -3,16 +3,21 @@ dnl # 2.6.38 API change dnl # open_bdev_exclusive() changed to blkdev_get_by_path() dnl # close_bdev_exclusive() changed to blkdev_put() dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], - [AC_MSG_CHECKING([whether blkdev_get_by_path() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [ + ZFS_LINUX_TEST_SRC([blkdev_get_by_path], [ #include ], [ blkdev_get_by_path(NULL, 0, NULL); - ], [blkdev_get_by_path], [fs/block_dev.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ + AC_MSG_CHECKING([whether blkdev_get_by_path() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([blkdev_get_by_path], + [blkdev_get_by_path], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH, 1, - [blkdev_get_by_path() is available]) + [blkdev_get_by_path() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-blkdev-reread-part.m4 b/config/kernel-blkdev-reread-part.m4 index 5664769a3091..1bf1e7c3a24d 100644 --- a/config/kernel-blkdev-reread-part.m4 +++ b/config/kernel-blkdev-reread-part.m4 @@ -2,16 +2,20 @@ dnl # dnl # 4.1 API, exported blkdev_reread_part() symbol, backported to the dnl # 3.10.0 CentOS 7.x enterprise kernels. dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [ - AC_MSG_CHECKING([whether blkdev_reread_part() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART], [ + ZFS_LINUX_TEST_SRC([blkdev_reread_part], [ #include ], [ struct block_device *bdev = NULL; int error; error = blkdev_reread_part(bdev); - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [ + AC_MSG_CHECKING([whether blkdev_reread_part() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_reread_part], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1, [blkdev_reread_part() is available]) diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4 index 5f2811c15348..c3d5eec52946 100644 --- a/config/kernel-block-device-operations.m4 +++ b/config/kernel-block-device-operations.m4 @@ -1,11 +1,8 @@ dnl # dnl # 2.6.38 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ - AC_MSG_CHECKING([whether bops->check_events() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ + ZFS_LINUX_TEST_SRC([block_device_operations_check_events], [ #include unsigned int blk_check_events(struct gendisk *disk, @@ -15,25 +12,25 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ bops __attribute__ ((unused)) = { .check_events = blk_check_events, }; - ],[ - ],[ + ], [], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ + AC_MSG_CHECKING([whether bops->check_events() exists]) + ZFS_LINUX_TEST_RESULT([block_device_operations_check_events], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1, [bops->check_events() exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # dnl # 3.10.x API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ - AC_MSG_CHECKING([whether bops->release() is void]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ + ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [ #include void blk_release(struct gendisk *g, fmode_t mode) { return; } @@ -45,13 +42,26 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ .ioctl = NULL, .compat_ioctl = NULL, }; - ],[ - ],[ - AC_MSG_RESULT(void) + ], [], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ + AC_MSG_CHECKING([whether bops->release() is void]) + ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [ + AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, [bops->release() returns void]) ],[ - AC_MSG_RESULT(int) + AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [ + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS], [ + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ]) diff --git a/config/kernel-clear-inode.m4 b/config/kernel-clear-inode.m4 index 8d880fcd8f50..3f454d7ec0d3 100644 --- a/config/kernel-clear-inode.m4 +++ b/config/kernel-clear-inode.m4 @@ -19,13 +19,18 @@ dnl # Therefore, to ensure we have the correct API we only allow the dnl # clear_inode() compatibility code to be defined iff the evict_inode() dnl # functionality is also detected. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], - [AC_MSG_CHECKING([whether clear_inode() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CLEAR_INODE], [ + ZFS_LINUX_TEST_SRC([clear_inode], [ #include ], [ clear_inode(NULL); - ], [clear_inode], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], [ + AC_MSG_CHECKING([whether clear_inode() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([clear_inode], + [clear_inode], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CLEAR_INODE, 1, [clear_inode() is available]) ], [ diff --git a/config/kernel-commit-metadata.m4 b/config/kernel-commit-metadata.m4 index b66a16fd212d..9bc3b6622bb7 100644 --- a/config/kernel-commit-metadata.m4 +++ b/config/kernel-commit-metadata.m4 @@ -4,19 +4,22 @@ dnl # Added eops->commit_metadata() callback to allow the underlying dnl # filesystem to determine the most efficient way to commit the inode. dnl # Prior to this the nfs server would issue an explicit fsync(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_COMMIT_METADATA], [ - AC_MSG_CHECKING([whether eops->commit_metadata() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_COMMIT_METADATA], [ + ZFS_LINUX_TEST_SRC([export_operations_commit_metadata], [ #include int commit_metadata(struct inode *inode) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .commit_metadata = commit_metadata, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_COMMIT_METADATA], [ + AC_MSG_CHECKING([whether eops->commit_metadata() exists]) + ZFS_LINUX_TEST_RESULT([export_operations_commit_metadata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_COMMIT_METADATA, 1, - [eops->commit_metadata() exists]) + [eops->commit_metadata() exists]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-config-defined.m4 b/config/kernel-config-defined.m4 new file mode 100644 index 000000000000..0ee4231cc2db --- /dev/null +++ b/config/kernel-config-defined.m4 @@ -0,0 +1,183 @@ +dnl # +dnl # Certain kernel build options are not supported. These must be +dnl # detected at configure time and cause a build failure. Otherwise +dnl # modules may be successfully built that behave incorrectly. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEFINED], [ + AS_IF([test "x$cross_compiling" != xyes], [ + AC_RUN_IFELSE([ + AC_LANG_PROGRAM([ + #include "$LINUX/include/linux/license.h" + ], [ + return !license_is_gpl_compatible( + "$ZFS_META_LICENSE"); + ]) + ], [ + AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], + [Define to 1 if GPL-only symbols can be used]) + ], [ + ]) + ]) + + ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE + ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC + ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS + ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE + ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE + + AC_MSG_CHECKING([for kernel config option compatibility]) + ZFS_LINUX_TEST_COMPILE_ALL([config]) + AC_MSG_RESULT([done]) + + ZFS_AC_KERNEL_CONFIG_THREAD_SIZE + ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC + ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS + ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE + ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE +]) + +dnl # +dnl # Check configured THREAD_SIZE +dnl # +dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 +dnl # the default thread stack size was increased to 16K from 8K. Therefore, +dnl # on newer kernels and some architectures stack usage optimizations can be +dnl # conditionally applied to improve performance without negatively impacting +dnl # stability. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE], [ + ZFS_LINUX_TEST_SRC([config_thread_size], [ + #include + ],[ + #if (THREAD_SIZE < 16384) + #error "THREAD_SIZE is less than 16K" + #endif + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ + AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) + ZFS_LINUX_TEST_RESULT([config_thread_size], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # Check CONFIG_DEBUG_LOCK_ALLOC +dnl # +dnl # This is typically only set for debug kernels because it comes with +dnl # a performance penalty. However, when it is set it maps the non-GPL +dnl # symbol mutex_lock() to the GPL-only mutex_lock_nested() symbol. +dnl # This will cause a failure at link time which we'd rather know about +dnl # at compile time. +dnl # +dnl # Since we plan to pursue making mutex_lock_nested() a non-GPL symbol +dnl # with the upstream community we add a check to detect this case. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC], [ + ZFS_LINUX_TEST_SRC([config_debug_lock_alloc], [ + #include + ],[ + struct mutex lock; + + mutex_init(&lock); + mutex_lock(&lock); + mutex_unlock(&lock); + ], [], [$ZFS_META_LICENSE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [ + AC_MSG_CHECKING([whether mutex_lock() is GPL-only]) + ZFS_LINUX_TEST_RESULT([config_debug_lock_alloc], [ + AC_MSG_RESULT(no) + ],[ + AC_MSG_RESULT(yes) + AC_MSG_ERROR([ + *** Kernel built with CONFIG_DEBUG_LOCK_ALLOC which is incompatible + *** with the CDDL license and will prevent the module linking stage + *** from succeeding. You must rebuild your kernel without this + *** option enabled.]) + ]) +]) + +dnl # +dnl # Check CONFIG_TRIM_UNUSED_KSYMS +dnl # +dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS disabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS], [ + ZFS_LINUX_TEST_SRC([config_trim_unusued_ksyms], [ + #if defined(CONFIG_TRIM_UNUSED_KSYMS) + #error CONFIG_TRIM_UNUSED_KSYMS not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS], [ + AC_MSG_CHECKING([whether CONFIG_TRIM_UNUSED_KSYM is disabled]) + ZFS_LINUX_TEST_RESULT([config_trim_unusued_ksyms], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AS_IF([test "x$enable_linux_builtin" != xyes], [ + AC_MSG_ERROR([ + *** This kernel has unused symbols trimming enabled, please disable. + *** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.]) + ]) + ]) +]) + +dnl # +dnl # Check CONFIG_ZLIB_INFLATE +dnl # +dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE], [ + ZFS_LINUX_TEST_SRC([config_zlib_inflate], [ + #if !defined(CONFIG_ZLIB_INFLATE) && \ + !defined(CONFIG_ZLIB_INFLATE_MODULE) + #error CONFIG_ZLIB_INFLATE not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE], [ + AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined]) + ZFS_LINUX_TEST_RESULT([config_zlib_inflate], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + *** This kernel does not include the required zlib inflate support. + *** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.]) + ]) +]) + +dnl # +dnl # Check CONFIG_ZLIB_DEFLATE +dnl # +dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE], [ + ZFS_LINUX_TEST_SRC([config_zlib_deflate], [ + #if !defined(CONFIG_ZLIB_DEFLATE) && \ + !defined(CONFIG_ZLIB_DEFLATE_MODULE) + #error CONFIG_ZLIB_DEFLATE not defined + #endif + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE], [ + AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined]) + ZFS_LINUX_TEST_RESULT([config_zlib_deflate], [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + *** This kernel does not include the required zlib deflate support. + *** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.]) + ]) +]) diff --git a/config/kernel-create-nameidata.m4 b/config/kernel-create-nameidata.m4 index d4c155c57fc9..c43ca5b85691 100644 --- a/config/kernel-create-nameidata.m4 +++ b/config/kernel-create-nameidata.m4 @@ -1,9 +1,8 @@ dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->create() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([create_nameidata], [ #include #include @@ -19,11 +18,15 @@ AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ iops __attribute__ ((unused)) = { .create = inode_create, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->create() passes nameidata]) + ZFS_LINUX_TEST_RESULT([create_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CREATE_NAMEIDATA, 1, - [iops->create() passes nameidata]) + [iops->create() passes nameidata]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-ctl-table-name.m4 b/config/kernel-ctl-table-name.m4 index 3ce499968ffd..16f2ad544511 100644 --- a/config/kernel-ctl-table-name.m4 +++ b/config/kernel-ctl-table-name.m4 @@ -2,14 +2,18 @@ dnl # dnl # 2.6.33 API change, dnl # Removed .ctl_name from struct ctl_table. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CTL_NAME], [ - AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CTL_NAME], [ + ZFS_LINUX_TEST_SRC([ctl_name], [ #include ],[ struct ctl_table ctl __attribute__ ((unused)); ctl.ctl_name = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CTL_NAME], [ + AC_MSG_CHECKING([whether struct ctl_table has ctl_name]) + ZFS_LINUX_TEST_RESULT([ctl_name], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CTL_NAME, 1, [struct ctl_table has ctl_name]) ],[ diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 index c7d5c9b52006..3ceb5f63efa9 100644 --- a/config/kernel-current-time.m4 +++ b/config/kernel-current-time.m4 @@ -2,14 +2,19 @@ dnl # dnl # 4.9, current_time() added dnl # 4.18, return type changed from timespec to timespec64 dnl # -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], - [AC_MSG_CHECKING([whether current_time() exists]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [ + ZFS_LINUX_TEST_SRC([current_time], [ #include ], [ struct inode ip __attribute__ ((unused)); ip.i_atime = current_time(&ip); - ], [current_time], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], [ + AC_MSG_CHECKING([whether current_time() exists]) + ZFS_LINUX_TEST_RESULT_SYMBOL([current_time], + [current_time], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) ], [ diff --git a/config/kernel-current_bio_tail.m4 b/config/kernel-current_bio_tail.m4 index b72f21e8a356..9dfc3e6e0da3 100644 --- a/config/kernel-current_bio_tail.m4 +++ b/config/kernel-current_bio_tail.m4 @@ -4,30 +4,36 @@ dnl # current->bio_tail and current->bio_list were struct bio pointers prior to dnl # Linux 2.6.34. They were refactored into a struct bio_list pointer called dnl # current->bio_list in Linux 2.6.34. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [ - AC_MSG_CHECKING([whether current->bio_tail exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL], [ + ZFS_LINUX_TEST_SRC([current_bio_tail], [ #include - ],[ + ], [ current->bio_tail = (struct bio **) NULL; - ],[ + ]) + + ZFS_LINUX_TEST_SRC([current_bio_list], [ + #include + ], [ + current->bio_list = (struct bio_list *) NULL; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [ + AC_MSG_CHECKING([whether current->bio_tail exists]) + ZFS_LINUX_TEST_RESULT([current_bio_tail], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_BIO_TAIL, 1, [current->bio_tail exists]) ],[ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether current->bio_list exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - current->bio_list = (struct bio_list *) NULL; - ],[ + ZFS_LINUX_TEST_RESULT([current_bio_list], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_BIO_LIST, 1, [current->bio_list exists]) ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) + ZFS_LINUX_TEST_ERROR([bio_list]) ]) ]) ]) diff --git a/config/kernel-d-make-root.m4 b/config/kernel-d-make-root.m4 deleted file mode 100644 index 9c2b73dcbf59..000000000000 --- a/config/kernel-d-make-root.m4 +++ /dev/null @@ -1,17 +0,0 @@ -dnl # -dnl # 3.4.0 API change -dnl # Added d_make_root() to replace previous d_alloc_root() function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], - [AC_MSG_CHECKING([whether d_make_root() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_make_root(NULL); - ], [d_make_root], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-d-obtain-alias.m4 b/config/kernel-d-obtain-alias.m4 deleted file mode 100644 index 2b4b11eccc1b..000000000000 --- a/config/kernel-d-obtain-alias.m4 +++ /dev/null @@ -1,18 +0,0 @@ -dnl # -dnl # 2.6.28 API change -dnl # Added d_obtain_alias() helper function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], - [AC_MSG_CHECKING([whether d_obtain_alias() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_obtain_alias(NULL); - ], [d_obtain_alias], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1, - [d_obtain_alias() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-d-prune-aliases.m4 b/config/kernel-d-prune-aliases.m4 deleted file mode 100644 index d9c521b1d4cc..000000000000 --- a/config/kernel-d-prune-aliases.m4 +++ /dev/null @@ -1,19 +0,0 @@ -dnl # -dnl # 2.6.12 API change -dnl # d_prune_aliases() helper function available. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], - [AC_MSG_CHECKING([whether d_prune_aliases() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - struct inode *ip = NULL; - d_prune_aliases(ip); - ], [d_prune_aliases], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, - [d_prune_aliases() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-declare-event-class.m4 b/config/kernel-declare-event-class.m4 index 7867d751749d..6c78ee858d7d 100644 --- a/config/kernel-declare-event-class.m4 +++ b/config/kernel-declare-event-class.m4 @@ -2,13 +2,10 @@ dnl # dnl # Ensure the DECLARE_EVENT_CLASS macro is available to non-GPL modules. dnl # AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-I\$(src)" - AC_MSG_CHECKING([whether DECLARE_EVENT_CLASS() is available]) ZFS_LINUX_TRY_COMPILE_HEADER([ #include - MODULE_LICENSE(ZFS_META_LICENSE); + MODULE_LICENSE("$ZFS_META_LICENSE"); #define CREATE_TRACE_POINTS #include "conftest.h" @@ -18,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_DECLARE_EVENT_CLASS, 1, - [DECLARE_EVENT_CLASS() is available]) + [DECLARE_EVENT_CLASS() is available]) ],[ AC_MSG_RESULT(no) ],[ @@ -55,5 +52,4 @@ AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [ #define TRACE_INCLUDE_FILE conftest #include ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index 61f5a27af5a7..2dfd2ac554cf 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -1,9 +1,103 @@ +dnl # +dnl # 3.4.0 API change +dnl # Added d_make_root() to replace previous d_alloc_root() function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_MAKE_ROOT], [ + ZFS_LINUX_TEST_SRC([d_make_root], [ + #include + ], [ + d_make_root(NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], [ + AC_MSG_CHECKING([whether d_make_root() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_make_root], + [d_make_root], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.28 API change +dnl # Added d_obtain_alias() helper function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS], [ + ZFS_LINUX_TEST_SRC([d_obtain_alias], [ + #include + ], [ + d_obtain_alias(NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], [ + AC_MSG_CHECKING([whether d_obtain_alias() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_obtain_alias], + [d_obtain_alias], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1, + [d_obtain_alias() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.12 API change +dnl # d_prune_aliases() helper function available. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES], [ + ZFS_LINUX_TEST_SRC([d_prune_aliases], [ + #include + ], [ + struct inode *ip = NULL; + d_prune_aliases(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], [ + AC_MSG_CHECKING([whether d_prune_aliases() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_prune_aliases], + [d_prune_aliases], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, + [d_prune_aliases() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +dnl # +dnl # 2.6.38 API change +dnl # Added d_set_d_op() helper function. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_SET_D_OP], [ + ZFS_LINUX_TEST_SRC([d_set_d_op], [ + #include + ], [ + d_set_d_op(NULL, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ + AC_MSG_CHECKING([whether d_set_d_op() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op], + [d_set_d_op], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_SET_D_OP, 1, [d_set_d_op() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([dentry_operations_revalidate], [ #include #include @@ -14,11 +108,15 @@ AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ dops __attribute__ ((unused)) = { .d_revalidate = revalidate, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ + AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) + ZFS_LINUX_TEST_RESULT([dentry_operations_revalidate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_D_REVALIDATE_NAMEIDATA, 1, - [dops->d_revalidate() operation takes nameidata]) + [dops->d_revalidate() operation takes nameidata]) ],[ AC_MSG_RESULT(no) ]) @@ -28,9 +126,8 @@ dnl # dnl # 2.6.30 API change dnl # The 'struct dentry_operations' was constified in the dentry structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ - AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS], [ + ZFS_LINUX_TEST_SRC([dentry_operations_const], [ #include const struct dentry_operations test_d_op = { @@ -38,51 +135,60 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ }; ],[ struct dentry d __attribute__ ((unused)); - d.d_op = &test_d_op; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, - [dentry uses const struct dentry_operations]) - ],[ - AC_MSG_RESULT(no) ]) ]) -dnl # -dnl # 2.6.38 API change -dnl # Added d_set_d_op() helper function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], - [AC_MSG_CHECKING([whether d_set_d_op() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - d_set_d_op(NULL, NULL); - ], [d_set_d_op], [fs/dcache.c], [ +AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ + AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) + ZFS_LINUX_TEST_RESULT([dentry_operations_const], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_SET_D_OP, 1, - [d_set_d_op() is available]) - ], [ + AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, + [dentry uses const struct dentry_operations]) + ],[ AC_MSG_RESULT(no) ]) ]) dnl # -dnl # 2.6.38 API chage +dnl # 2.6.38 API change dnl # Added sb->s_d_op default dentry_operations member dnl # -AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], - [AC_MSG_CHECKING([whether super_block has s_d_op]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_S_D_OP], [ + ZFS_LINUX_TEST_SRC([super_block_s_d_op], [ #include ],[ struct super_block sb __attribute__ ((unused)); sb.s_d_op = NULL; - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], [ + AC_MSG_CHECKING([whether super_block has s_d_op]) + ZFS_LINUX_TEST_RESULT([super_block_s_d_op], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_S_D_OP, 1, [struct super_block has s_d_op]) ], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ + ZFS_AC_KERNEL_SRC_D_MAKE_ROOT + ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS + ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES + ZFS_AC_KERNEL_SRC_D_SET_D_OP + ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA + ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_SRC_S_D_OP +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ + ZFS_AC_KERNEL_D_MAKE_ROOT + ZFS_AC_KERNEL_D_OBTAIN_ALIAS + ZFS_AC_KERNEL_D_PRUNE_ALIASES + ZFS_AC_KERNEL_D_SET_D_OP + ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA + ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_S_D_OP +]) diff --git a/config/kernel-dirty-inode.m4 b/config/kernel-dirty-inode.m4 index ffd87bb146d6..dc7667fa4881 100644 --- a/config/kernel-dirty-inode.m4 +++ b/config/kernel-dirty-inode.m4 @@ -4,9 +4,8 @@ dnl # The sops->dirty_inode() callbacks were updated to take a flags dnl # argument. This allows the greater control over whether the dnl # filesystem needs to push out a transaction or not. dnl # -AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS], [ - AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ + ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ #include void dirty_inode(struct inode *a, int b) { return; } @@ -15,11 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS], [ sops __attribute__ ((unused)) = { .dirty_inode = dirty_inode, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE], [ + AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) + ZFS_LINUX_TEST_RESULT([dirty_inode_with_flags], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_DIRTY_INODE_WITH_FLAGS, 1, - [sops->dirty_inode() wants flags]) + [sops->dirty_inode() wants flags]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-discard-granularity.m4 b/config/kernel-discard-granularity.m4 index 2c677c909687..c830d9aa9fbc 100644 --- a/config/kernel-discard-granularity.m4 +++ b/config/kernel-discard-granularity.m4 @@ -2,18 +2,21 @@ dnl # dnl # 2.6.33 API change dnl # Discard granularity and alignment restrictions may now be set. dnl # -AC_DEFUN([ZFS_AC_KERNEL_DISCARD_GRANULARITY], [ - AC_MSG_CHECKING([whether ql->discard_granularity is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY], [ + ZFS_LINUX_TEST_SRC([discard_granularity], [ #include ],[ struct queue_limits ql __attribute__ ((unused)); - ql.discard_granularity = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DISCARD_GRANULARITY], [ + AC_MSG_CHECKING([whether ql->discard_granularity is available]) + ZFS_LINUX_TEST_RESULT([discard_granularity], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_DISCARD_GRANULARITY, 1, - [ql->discard_granularity is available]) + [ql->discard_granularity is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4 deleted file mode 100644 index eba252579bcc..000000000000 --- a/config/kernel-elevator-change.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 2.6.36 API, exported elevator_change() symbol -dnl # 4.12 API, removed elevator_change() symbol -dnl # -AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [ - AC_MSG_CHECKING([whether elevator_change() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - int ret; - struct request_queue *q = NULL; - char *elevator = NULL; - ret = elevator_change(q, elevator); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ELEVATOR_CHANGE, 1, - [elevator_change() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-encode-fh-inode.m4 b/config/kernel-encode-fh-inode.m4 index 287f62a5eda6..9d4ba5f0f61f 100644 --- a/config/kernel-encode-fh-inode.m4 +++ b/config/kernel-encode-fh-inode.m4 @@ -4,20 +4,23 @@ dnl # torvalds/linux@b0b0382bb4904965a9e9fca77ad87514dfda0d1c changed the dnl # ->encode_fh() callback to pass the child inode and its parents inode dnl # rather than a dentry and a boolean saying whether we want the parent. dnl # -AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ - AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ + ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ #include int encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .encode_fh = encode_fh, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ + AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) + ZFS_LINUX_TEST_RESULT([export_operations_encode_fh], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_ENCODE_FH_WITH_INODE, 1, - [eops->encode_fh() wants child and parent inodes]) + [eops->encode_fh() wants child and parent inodes]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-evict-inode.m4 b/config/kernel-evict-inode.m4 index 683cedb6d387..cd91c6669468 100644 --- a/config/kernel-evict-inode.m4 +++ b/config/kernel-evict-inode.m4 @@ -3,16 +3,19 @@ dnl # 2.6.36 API change dnl # The sops->delete_inode() and sops->clear_inode() callbacks have dnl # replaced by a single sops->evict_inode() callback. dnl # -AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ - AC_MSG_CHECKING([whether sops->evict_inode() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ + ZFS_LINUX_TEST_SRC([evict_inode], [ #include void evict_inode (struct inode * t) { return; } static struct super_operations sops __attribute__ ((unused)) = { .evict_inode = evict_inode, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ + AC_MSG_CHECKING([whether sops->evict_inode() exists]) + ZFS_LINUX_TEST_RESULT([evict_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_EVICT_INODE, 1, [sops->evict_inode() exists]) ],[ diff --git a/config/kernel-fallocate-pax.m4 b/config/kernel-fallocate-pax.m4 deleted file mode 100644 index e8948be176d1..000000000000 --- a/config/kernel-fallocate-pax.m4 +++ /dev/null @@ -1,19 +0,0 @@ -dnl # -dnl # PaX Linux 2.6.38 - 3.x API -dnl # -AC_DEFUN([ZFS_AC_PAX_KERNEL_FILE_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - long (*fallocate) (struct file *, int, loff_t, loff_t) = NULL; - struct file_operations_no_const fops __attribute__ ((unused)) = { - .fallocate = fallocate, - }; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fallocate.m4 b/config/kernel-fallocate.m4 index 5509064725e0..302957a6c4ba 100644 --- a/config/kernel-fallocate.m4 +++ b/config/kernel-fallocate.m4 @@ -1,9 +1,11 @@ dnl # -dnl # Linux 2.6.38 - 3.x API +dnl # The fallocate callback was moved from the inode_operations +dnl # structure to the file_operations structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ + + dnl # Linux 2.6.38 - 3.x API + ZFS_LINUX_TEST_SRC([file_fallocate], [ #include long test_fallocate(struct file *file, int mode, @@ -13,21 +15,10 @@ AC_DEFUN([ZFS_AC_KERNEL_FILE_FALLOCATE], [ fops __attribute__ ((unused)) = { .fallocate = test_fallocate, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) + ], []) -dnl # -dnl # Linux 2.6.x - 2.6.37 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_FALLOCATE], [ - AC_MSG_CHECKING([whether iops->fallocate() exists]) - ZFS_LINUX_TRY_COMPILE([ + dnl # Linux 2.6.x - 2.6.37 API + ZFS_LINUX_TEST_SRC([inode_fallocate], [ #include long test_fallocate(struct inode *inode, int mode, @@ -37,20 +28,23 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_FALLOCATE], [ fops __attribute__ ((unused)) = { .fallocate = test_fallocate, }; + ], []) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ + AC_MSG_CHECKING([whether fops->fallocate() exists]) + ZFS_LINUX_TEST_RESULT([file_fallocate], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists]) ],[ - ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether iops->fallocate() exists]) + ZFS_LINUX_TEST_RESULT([inode_fallocate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_FALLOCATE, 1, [fops->fallocate() exists]) ],[ AC_MSG_RESULT(no) ]) ]) - -dnl # -dnl # The fallocate callback was moved from the inode_operations -dnl # structure to the file_operations structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ - ZFS_AC_KERNEL_FILE_FALLOCATE - ZFS_AC_KERNEL_INODE_FALLOCATE -]) diff --git a/config/kernel-file-dentry.m4 b/config/kernel-file-dentry.m4 index daf742ee1b0a..9cb5869c3821 100644 --- a/config/kernel-file-dentry.m4 +++ b/config/kernel-file-dentry.m4 @@ -4,14 +4,18 @@ dnl # struct access file->f_path.dentry was replaced by accessor function dnl # since fix torvalds/linux@4bacc9c9234c ("overlayfs: Make f_path always dnl # point to the overlay and f_inode to the underlay"). dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ - AC_MSG_CHECKING([whether file_dentry() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_DENTRY], [ + ZFS_LINUX_TEST_SRC([file_dentry], [ #include ],[ struct file *f = NULL; file_dentry(f); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ + AC_MSG_CHECKING([whether file_dentry() is available]) + ZFS_LINUX_TEST_RESULT([file_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_DENTRY, 1, [file_dentry() is available]) ],[ diff --git a/config/kernel-file-inode.m4 b/config/kernel-file-inode.m4 index 300188fa3a64..00a3621657ad 100644 --- a/config/kernel-file-inode.m4 +++ b/config/kernel-file-inode.m4 @@ -3,14 +3,18 @@ dnl # 3.19 API change dnl # struct access f->f_dentry->d_inode was replaced by accessor function dnl # file_inode(f) dnl # -AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ - AC_MSG_CHECKING([whether file_inode() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_INODE], [ + ZFS_LINUX_TEST_SRC([file_inode], [ #include ],[ struct file *f = NULL; file_inode(f); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ + AC_MSG_CHECKING([whether file_inode() is available]) + ZFS_LINUX_TEST_RESULT([file_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FILE_INODE, 1, [file_inode() is available]) ],[ diff --git a/config/kernel-fmode-t.m4 b/config/kernel-fmode-t.m4 index 4a23c391d326..bc0001b9ebcc 100644 --- a/config/kernel-fmode-t.m4 +++ b/config/kernel-fmode-t.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.28 API change, dnl # check if fmode_t typedef is defined dnl # -AC_DEFUN([ZFS_AC_KERNEL_TYPE_FMODE_T], - [AC_MSG_CHECKING([whether kernel defines fmode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FMODE_T], [ + ZFS_LINUX_TEST_SRC([type_fmode_t], [ #include ],[ fmode_t *ptr __attribute__ ((unused)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FMODE_T], [ + AC_MSG_CHECKING([whether kernel defines fmode_t]) + ZFS_LINUX_TEST_RESULT([type_fmode_t], [ AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_FMODE_T, 1, - [kernel defines fmode_t]) + AC_DEFINE(HAVE_FMODE_T, 1, [kernel defines fmode_t]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-follow-down-one.m4 b/config/kernel-follow-down-one.m4 index 63fa779d8571..94e4aeb8d470 100644 --- a/config/kernel-follow-down-one.m4 +++ b/config/kernel-follow-down-one.m4 @@ -3,14 +3,18 @@ dnl # 2.6.38 API change dnl # follow_down() renamed follow_down_one(). The original follow_down() dnl # symbol still exists but will traverse down all the layers. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_DOWN_ONE], [ - AC_MSG_CHECKING([whether follow_down_one() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE], [ + ZFS_LINUX_TEST_SRC([follow_down_one], [ #include ],[ struct path *p = NULL; follow_down_one(p); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_DOWN_ONE], [ + AC_MSG_CHECKING([whether follow_down_one() is available]) + ZFS_LINUX_TEST_RESULT([follow_down_one], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FOLLOW_DOWN_ONE, 1, [follow_down_one() is available]) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index 5fff79a74c70..3c7933413d18 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,8 +2,9 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel -dnl # 5.0: All kernel fpu functions are GPL only, so we can't use them. -dnl # (nothing defined) +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. +dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. +dnl # HAVE_KERNEL_FPU_INTERNAL dnl # dnl # 4.2: Use __kernel_fpu_{begin,end}() dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU @@ -11,55 +12,120 @@ dnl # dnl # Pre-4.2: Use kernel_fpu_{begin,end}() dnl # HAVE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU dnl # -AC_DEFUN([ZFS_AC_KERNEL_FPU], [ - AC_MSG_CHECKING([which kernel_fpu header to use]) +dnl # N.B. The header check is performed before all other checks since it +dnl # depends on HAVE_KERNEL_FPU_API_HEADER being set in confdefs.h. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [ + AC_MSG_CHECKING([whether fpu headers are available]) ZFS_LINUX_TRY_COMPILE([ #include #include ],[ ],[ - AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, [kernel has asm/fpu/api.h]) + AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, + [kernel has asm/fpu/api.h]) AC_MSG_RESULT(asm/fpu/api.h) ],[ AC_MSG_RESULT(i387.h & xcr.h) ]) +]) - AC_MSG_CHECKING([which kernel_fpu function to use]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ + ZFS_LINUX_TEST_SRC([kernel_fpu], [ + #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include #else #include #include #endif - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ + ], [ kernel_fpu_begin(); kernel_fpu_end(); - ], [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ + ], [], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([__kernel_fpu], [ + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #else + #include + #include + #endif + ], [ + __kernel_fpu_begin(); + __kernel_fpu_end(); + ], [], [$ZFS_META_LICENSE]) + + ZFS_LINUX_TEST_SRC([fpu_internal], [ + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif + + #if !defined(__x86) + #error Unsupported architecture + #endif + + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + #if !defined(XSTATE_XSAVE) + #error XSTATE_XSAVE not defined + #endif + + #if !defined(XSTATE_XRESTORE) + #error XSTATE_XRESTORE not defined + #endif + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->state; + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FPU], [ + dnl # + dnl # Legacy kernel + dnl # + AC_MSG_CHECKING([whether kernel fpu is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([kernel_fpu_license], + [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ AC_MSG_RESULT(kernel_fpu_*) - AC_DEFINE(HAVE_KERNEL_FPU, 1, [kernel has kernel_fpu_* functions]) - AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) + AC_DEFINE(HAVE_KERNEL_FPU, 1, + [kernel has kernel_fpu_* functions]) + AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, + [kernel exports FPU functions]) ],[ - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - #ifdef HAVE_KERNEL_FPU_API_HEADER - #include - #else - #include - #include - #endif - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - __kernel_fpu_begin(); - __kernel_fpu_end(); - ], [__kernel_fpu_begin], [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ + dnl # + dnl # Linux 4.2 kernel + dnl # + ZFS_LINUX_TEST_RESULT_SYMBOL([__kernel_fpu_license], + [__kernel_fpu_begin], + [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ AC_MSG_RESULT(__kernel_fpu_*) - AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, [kernel has __kernel_fpu_* functions]) - AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) + AC_DEFINE(HAVE_UNDERSCORE_KERNEL_FPU, 1, + [kernel has __kernel_fpu_* functions]) + AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, + [kernel exports FPU functions]) ],[ - AC_MSG_RESULT(not exported) + ZFS_LINUX_TEST_RESULT([fpu_internal], [ + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) + ],[ + AC_MSG_RESULT(unavailable) + ]) ]) ]) ]) diff --git a/config/kernel-fst-mount.m4 b/config/kernel-fst-mount.m4 index a8ac50bdd5d9..cec1ed4d6cdf 100644 --- a/config/kernel-fst-mount.m4 +++ b/config/kernel-fst-mount.m4 @@ -3,9 +3,8 @@ dnl # 2.6.38 API change dnl # The .get_sb callback has been replaced by a .mount callback dnl # in the file_system_type structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ - AC_MSG_CHECKING([whether fst->mount() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FST_MOUNT], [ + ZFS_LINUX_TEST_SRC([file_system_type_mount], [ #include static struct dentry * @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ static struct file_system_type fst __attribute__ ((unused)) = { .mount = mount, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FST_MOUNT], [ + AC_MSG_CHECKING([whether fst->mount() exists]) + ZFS_LINUX_TEST_RESULT([file_system_type_mount], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FST_MOUNT, 1, [fst->mount() exists]) ],[ diff --git a/config/kernel-fsync.m4 b/config/kernel-fsync.m4 index e1f2d68b9b1f..0494e31ad11a 100644 --- a/config/kernel-fsync.m4 +++ b/config/kernel-fsync.m4 @@ -1,8 +1,8 @@ dnl # -dnl # Linux 2.6.x - 2.6.34 API +dnl # Check file_operations->fsync interface. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITH_DENTRY], [ - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ + ZFS_LINUX_TEST_SRC([fsync_with_dentry], [ #include int test_fsync(struct file *f, struct dentry *dentry, int x) @@ -12,20 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITH_DENTRY], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([dentry]) - AC_DEFINE(HAVE_FSYNC_WITH_DENTRY, 1, - [fops->fsync() with dentry]) - ],[ - ]) -]) + ],[]) -dnl # -dnl # Linux 2.6.35 - Linux 3.0 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY], [ - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ #include int test_fsync(struct file *f, int x) { return 0; } @@ -34,20 +23,9 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([no dentry]) - AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, - [fops->fsync() without dentry]) - ],[ - ]) -]) + ],[]) -dnl # -dnl # Linux 3.1 - 3.x API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FSYNC_RANGE], [ - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([fsync_range], [ #include int test_fsync(struct file *f, loff_t a, loff_t b, int c) @@ -57,18 +35,43 @@ AC_DEFUN([ZFS_AC_KERNEL_FSYNC_RANGE], [ fops __attribute__ ((unused)) = { .fsync = test_fsync, }; - ],[ - ],[ - AC_MSG_RESULT([range]) - AC_DEFINE(HAVE_FSYNC_RANGE, 1, - [fops->fsync() with range]) - ],[ - ]) + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_FSYNC], [ - AC_MSG_CHECKING([whether fops->fsync() wants]) - ZFS_AC_KERNEL_FSYNC_WITH_DENTRY - ZFS_AC_KERNEL_FSYNC_WITHOUT_DENTRY - ZFS_AC_KERNEL_FSYNC_RANGE + dnl # + dnl # Linux 2.6.x - 2.6.34 API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants dentry]) + ZFS_LINUX_TEST_RESULT([fsync_with_dentry], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FSYNC_WITH_DENTRY, 1, + [fops->fsync() with dentry]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 2.6.35 - Linux 3.0 API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants no dentry]) + ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, + [fops->fsync() without dentry]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 3.1 - 3.x API + dnl # + AC_MSG_CHECKING([whether fops->fsync() wants range]) + ZFS_LINUX_TEST_RESULT([fsync_range], [ + AC_MSG_RESULT([range]) + AC_DEFINE(HAVE_FSYNC_RANGE, 1, + [fops->fsync() with range]) + ],[ + ZFS_LINUX_TEST_ERROR([fops->fsync]) + ]) + ]) + ]) ]) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index 0aa76216226c..423b3e5a3521 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -1,12 +1,8 @@ dnl # -dnl # 3.19 API addition +dnl # Check for generic io accounting interface. dnl # -dnl # torvalds/linux@394ffa503bc40e32d7f54a9b817264e81ce131b4 allows us to -dnl # increment iostat counters without generic_make_request(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [ - AC_MSG_CHECKING([whether 3 arg generic IO accounting symbols are available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ + ZFS_LINUX_TEST_SRC([generic_acct_3args], [ #include void (*generic_start_io_acct_f)(int, unsigned long, @@ -16,24 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [ ], [ generic_start_io_acct(0, 0, NULL); generic_end_io_acct(0, NULL, 0); - ], [generic_start_io_acct], [block/bio.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_start_io_acct()/generic_end_io_acct() available]) - ], [ - AC_MSG_RESULT(no) ]) -]) -dnl # -dnl # Linux 4.14 API, -dnl # -dnl # generic_start_io_acct/generic_end_io_acct now require request_queue to be -dnl # provided. No functional changes, but preparation for inflight accounting -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [ - AC_MSG_CHECKING([whether 4 arg generic IO accounting symbols are available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ + ZFS_LINUX_TEST_SRC([generic_acct_4args], [ #include void (*generic_start_io_acct_f)(struct request_queue *, int, @@ -43,11 +24,41 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [ ], [ generic_start_io_acct(NULL, 0, 0, NULL); generic_end_io_acct(NULL, 0, NULL, 0); - ], [generic_start_io_acct], [block/bio.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ + dnl # + dnl # 3.19 API addition + dnl # + dnl # torvalds/linux@394ffa50 allows us to increment iostat + dnl # counters without generic_make_request(). + dnl # + AC_MSG_CHECKING([whether generic IO accounting wants 3 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + [generic_start_io_acct], [block/bio.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, - [generic_start_io_acct()/generic_end_io_acct() 4 arg available]) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() available]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # Linux 4.14 API, + dnl # + dnl # generic_start_io_acct/generic_end_io_acct now require + dnl # request_queue to be provided. No functional changes, + dnl # but preparation for inflight accounting. + dnl # + AC_MSG_CHECKING([whether generic IO accounting wants 4 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], + [generic_start_io_acct], [block/bio.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, + [generic_start_io_acct()/generic_end_io_acct() ] + [4 arg available]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) diff --git a/config/kernel-generic_readlink.m4 b/config/kernel-generic_readlink.m4 index 914431de4fdc..a7a33b408abd 100644 --- a/config/kernel-generic_readlink.m4 +++ b/config/kernel-generic_readlink.m4 @@ -4,18 +4,21 @@ dnl # dnl # NULL inode_operations.readlink implies generic_readlink(), which dnl # has been made static. dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ - AC_MSG_CHECKING([whether generic_readlink is global]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL], [ + ZFS_LINUX_TEST_SRC([generic_readlink_global], [ #include ],[ int i __attribute__ ((unused)); - i = generic_readlink(NULL, NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ + AC_MSG_CHECKING([whether generic_readlink is global]) + ZFS_LINUX_TEST_RESULT([generic_readlink_global], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_GENERIC_READLINK, 1, - [generic_readlink is global]) + [generic_readlink is global]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-get-disk-and-module.m4 b/config/kernel-get-disk-and-module.m4 index 2a51a5af7dc1..51cf7743cf0b 100644 --- a/config/kernel-get-disk-and-module.m4 +++ b/config/kernel-get-disk-and-module.m4 @@ -2,14 +2,19 @@ dnl # dnl # 4.16 API change dnl # Verify if get_disk_and_module() symbol is available. dnl # -AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], - [AC_MSG_CHECKING([whether get_disk_and_module() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE], [ + ZFS_LINUX_TEST_SRC([get_disk_and_module], [ #include ], [ struct gendisk *disk = NULL; (void) get_disk_and_module(disk); - ], [get_disk_and_module], [block/genhd.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], [ + AC_MSG_CHECKING([whether get_disk_and_module() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([get_disk_and_module], + [get_disk_and_module], [block/genhd.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_DISK_AND_MODULE, 1, [get_disk_and_module() is available]) diff --git a/config/kernel-get-disk-ro.m4 b/config/kernel-get-disk-ro.m4 index 13ed81217ee3..1e2abb475f9a 100644 --- a/config/kernel-get-disk-ro.m4 +++ b/config/kernel-get-disk-ro.m4 @@ -1,21 +1,21 @@ dnl # dnl # 2.6.x API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [ - AC_MSG_CHECKING([whether get_disk_ro() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_RO], [ + ZFS_LINUX_TEST_SRC([get_disk_ro], [ #include ],[ struct gendisk *disk = NULL; (void) get_disk_ro(disk); - ],[ + ], [$NO_UNUSED_BUT_SET_VARIABLE]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [ + AC_MSG_CHECKING([whether get_disk_ro() is available]) + ZFS_LINUX_TEST_RESULT([get_disk_ro], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_DISK_RO, 1, - [blk_disk_ro() is available]) + AC_DEFINE(HAVE_GET_DISK_RO, 1, [blk_disk_ro() is available]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-get-link.m4 b/config/kernel-get-link.m4 index 3cda08c1b4d5..e4f478e37c18 100644 --- a/config/kernel-get-link.m4 +++ b/config/kernel-get-link.m4 @@ -1,13 +1,29 @@ dnl # dnl # Supported get_link() interfaces checked newest to oldest. +dnl # Note this interface used to be named follow_link. dnl # -AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_LINK], [ - dnl # - dnl # 4.2 API change - dnl # - This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING([whether iops->follow_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ + ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ + #include + const char *get_link(struct dentry *de, struct inode *ip, + struct delayed_call *done) { return "symlink"; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .get_link = get_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ + #include + const char *get_link(struct dentry *de, struct + inode *ip, void **cookie) { return "symlink"; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .get_link = get_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ #include const char *follow_link(struct dentry *de, void **cookie) { return "symlink"; } @@ -15,35 +31,17 @@ AC_DEFUN([ZFS_AC_KERNEL_FOLLOW_LINK], [ iops __attribute__ ((unused)) = { .follow_link = follow_link, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, - [iops->follow_link() cookie]) - ],[ - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether iops->follow_link() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ #include - void *follow_link(struct dentry *de, struct - nameidata *nd) { return (void *)NULL; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, - [iops->follow_link() nameidata]) - ],[ - AC_MSG_ERROR(no; please file a bug report) - ]) - ]) + void *follow_link(struct dentry *de, struct + nameidata *nd) { return (void *)NULL; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .follow_link = follow_link, + }; + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ @@ -53,20 +51,12 @@ AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ dnl # used it to retire the put_link() interface. dnl # AC_MSG_CHECKING([whether iops->get_link() passes delayed]) - ZFS_LINUX_TRY_COMPILE([ - #include - const char *get_link(struct dentry *de, struct inode *ip, - struct delayed_call *done) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, - [iops->get_link() delayed]) + AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, [iops->get_link() delayed]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 4.5 API change dnl # The follow_link() interface has been replaced by @@ -74,27 +64,41 @@ AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ dnl # - An inode is passed as a separate argument dnl # - When called in RCU mode a NULL dentry is passed. dnl # - AC_MSG_RESULT(no) AC_MSG_CHECKING([whether iops->get_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ - #include - const char *get_link(struct dentry *de, struct - inode *ip, void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link_cookie], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GET_LINK_COOKIE, 1, [iops->get_link() cookie]) ],[ + AC_MSG_RESULT(no) + dnl # - dnl # Check for the follow_link APIs. + dnl # 4.2 API change + dnl # This kernel retired the nameidata structure. dnl # - AC_MSG_RESULT(no) - ZFS_AC_KERNEL_FOLLOW_LINK + AC_MSG_CHECKING( + [whether iops->follow_link() passes cookie]) + ZFS_LINUX_TEST_RESULT([inode_operations_follow_link], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, + [iops->follow_link() cookie]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # 2.6.32 API + dnl # + AC_MSG_CHECKING( + [whether iops->follow_link() passes nameidata]) + ZFS_LINUX_TEST_RESULT( + [inode_operations_follow_link_nameidata],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, + [iops->follow_link() nameidata]) + ],[ + ZFS_LINUX_TEST_ERROR([get_link]) + ]) + ]) ]) ]) ]) diff --git a/config/kernel-global_page_state.m4 b/config/kernel-global_page_state.m4 index f4a40011f6f8..a0cb9e2c8270 100644 --- a/config/kernel-global_page_state.m4 +++ b/config/kernel-global_page_state.m4 @@ -4,16 +4,21 @@ dnl # dnl # 75ef71840539 mm, vmstat: add infrastructure for per-node vmstats dnl # 599d0c954f91 mm, vmscan: move LRU lists to node dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_node_page_state() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE], [ + ZFS_LINUX_TEST_SRC([global_node_page_state], [ #include #include ],[ (void) global_node_page_state(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ + AC_MSG_CHECKING([whether global_node_page_state() exists]) + ZFS_LINUX_TEST_RESULT([global_node_page_state], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, [global_node_page_state() exists]) + AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, + [global_node_page_state() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -24,16 +29,21 @@ dnl # 4.14 API change dnl # dnl # c41f012ade0b mm: rename global_page_state to global_zone_page_state dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_zone_page_state() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE], [ + ZFS_LINUX_TEST_SRC([global_zone_page_state], [ #include #include ],[ (void) global_zone_page_state(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ + AC_MSG_CHECKING([whether global_zone_page_state() exists]) + ZFS_LINUX_TEST_RESULT([global_zone_page_state], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, [global_zone_page_state() exists]) + AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, + [global_zone_page_state() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -44,9 +54,11 @@ dnl # Create a define and autoconf variable for an enum member dnl # AC_DEFUN([ZFS_AC_KERNEL_ENUM_MEMBER], [ AC_MSG_CHECKING([whether enum $2 contains $1]) - AS_IF([AC_TRY_COMMAND("${srcdir}/scripts/enum-extract.pl" "$2" "$3" | egrep -qx $1)],[ + AS_IF([AC_TRY_COMMAND( + "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | egrep -qx $1)],[ AC_MSG_RESULT([yes]) - AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, [enum $2 contains $1]) + AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, + [enum $2 contains $1]) m4_join([_], [ZFS_ENUM], m4_toupper($2), $1)=1 ],[ AC_MSG_RESULT([no]) @@ -59,8 +71,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR],[ AC_MSG_RESULT(no) AC_MSG_RESULT([$1 in either node_stat_item or zone_stat_item: $2]) - AC_MSG_RESULT([configure needs updating, see: config/kernel-global_page_state.m4]) - AC_MSG_FAILURE([SHUT 'ER DOWN CLANCY, SHE'S PUMPIN' MUD!]) + ZFS_LINUX_TEST_ERROR([global page state]) ]) AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ @@ -75,10 +86,10 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ ]) dnl # -dnl # Ensure the config tests are finding one and only one of each enum of interest +dnl # Ensure the config tests are finding one and only one of each enum. dnl # AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ - AC_MSG_CHECKING([global_page_state enums are sane]) + AC_MSG_CHECKING([whether global_page_state enums are sane]) ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_FILE_PAGES]) ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_ANON]) @@ -88,6 +99,11 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ AC_MSG_RESULT(yes) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE], [ + ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE + ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE +]) + dnl # dnl # enum members in which we're interested dnl # @@ -95,15 +111,23 @@ AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE], [ ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], + [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) + ZFS_AC_KERNEL_ENUM_MEMBER([NR_SLAB_RECLAIMABLE], + [zone_stat_item], [$LINUX/include/linux/mmzone.h]) ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY ]) diff --git a/config/kernel-group-info.m4 b/config/kernel-group-info.m4 index 849a1e246a4a..0fee1d36d50d 100644 --- a/config/kernel-group-info.m4 +++ b/config/kernel-group-info.m4 @@ -2,20 +2,21 @@ dnl # dnl # 4.9 API change dnl # group_info changed from 2d array via >blocks to 1d array via ->gid dnl # -AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ - AC_MSG_CHECKING([whether group_info->gid exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_GROUP_INFO_GID], [ + ZFS_LINUX_TEST_SRC([group_info_gid], [ #include ],[ struct group_info *gi = groups_alloc(1); gi->gid[0] = KGIDT_INIT(0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ + AC_MSG_CHECKING([whether group_info->gid exists]) + ZFS_LINUX_TEST_RESULT([group_info_gid], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-in-compat-syscall.m4 b/config/kernel-in-compat-syscall.m4 index 9fca9da20ea0..baaac8c4fda2 100644 --- a/config/kernel-in-compat-syscall.m4 +++ b/config/kernel-in-compat-syscall.m4 @@ -4,13 +4,17 @@ dnl # Added in_compat_syscall() which can be overridden on a per- dnl # architecture basis. Prior to this is_compat_task() was the dnl # provided interface. dnl # -AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ - AC_MSG_CHECKING([whether in_compat_syscall() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL], [ + ZFS_LINUX_TEST_SRC([in_compat_syscall], [ #include ],[ in_compat_syscall(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ + AC_MSG_CHECKING([whether in_compat_syscall() is available]) + ZFS_LINUX_TEST_RESULT([in_compat_syscall], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, [in_compat_syscall() is available]) diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 index f10e0b251085..48391d66f8bd 100644 --- a/config/kernel-inode-getattr.m4 +++ b/config/kernel-inode-getattr.m4 @@ -2,9 +2,8 @@ dnl # dnl # Linux 4.11 API dnl # See torvalds/linux@a528d35 dnl # -AC_DEFUN([ZFS_AC_PATH_KERNEL_IOPS_GETATTR], [ - AC_MSG_CHECKING([whether iops->getattr() takes a path]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ + ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [ #include int test_getattr( @@ -16,24 +15,9 @@ AC_DEFUN([ZFS_AC_PATH_KERNEL_IOPS_GETATTR], [ iops __attribute__ ((unused)) = { .getattr = test_getattr, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, - [iops->getattr() takes a path]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - + ],[]) - -dnl # -dnl # Linux 3.9 - 4.10 API -dnl # -AC_DEFUN([ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR], [ - AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ #include int test_getattr( @@ -45,23 +29,25 @@ AC_DEFUN([ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR], [ iops __attribute__ ((unused)) = { .getattr = test_getattr, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ + AC_MSG_CHECKING([whether iops->getattr() takes a path]) + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, - [iops->getattr() takes a vfsmount]) + AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, + [iops->getattr() takes a path]) ],[ AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # The interface of the getattr callback from the inode_operations -dnl # structure changed. Also, the interface of the simple_getattr() -dnl # function provided by the kernel changed. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GETATTR], [ - ZFS_AC_PATH_KERNEL_IOPS_GETATTR - ZFS_AC_VFSMOUNT_KERNEL_IOPS_GETATTR + AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, + [iops->getattr() takes a vfsmount]) + ],[ + AC_MSG_RESULT(no) + ]) + ]) ]) diff --git a/config/kernel-inode-lock.m4 b/config/kernel-inode-lock.m4 index 8dee01422727..5eb04af78771 100644 --- a/config/kernel-inode-lock.m4 +++ b/config/kernel-inode-lock.m4 @@ -4,20 +4,21 @@ dnl # i_mutex is changed to i_rwsem. Instead of directly using dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared() dnl # We test inode_lock_shared because inode_lock is introduced earlier. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ - AC_MSG_CHECKING([whether inode_lock_shared() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_LOCK], [ + ZFS_LINUX_TEST_SRC([inode_lock], [ #include ],[ struct inode *inode = NULL; inode_lock_shared(inode); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ + AC_MSG_CHECKING([whether inode_lock_shared() exists]) + ZFS_LINUX_TEST_RESULT([inode_lock], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-inode-set-flags.m4 b/config/kernel-inode-set-flags.m4 index e0ad26796dd0..133f666a9517 100644 --- a/config/kernel-inode-set-flags.m4 +++ b/config/kernel-inode-set-flags.m4 @@ -2,14 +2,18 @@ dnl # dnl # 3.15 API change dnl # inode_set_flags introduced to set i_flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ - AC_MSG_CHECKING([whether inode_set_flags() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_set_flags], [ #include ],[ struct inode inode; inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ + AC_MSG_CHECKING([whether inode_set_flags() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_SET_FLAGS, 1, [inode_set_flags() exists]) ],[ diff --git a/config/kernel-inode-set-iversion.m4 b/config/kernel-inode-set-iversion.m4 index 9a7d7890e54e..dd415de324a7 100644 --- a/config/kernel-inode-set-iversion.m4 +++ b/config/kernel-inode-set-iversion.m4 @@ -2,14 +2,18 @@ dnl # dnl # 4.16 API change dnl # inode_set_iversion introduced to set i_version dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ - AC_MSG_CHECKING([whether inode_set_iversion() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION], [ + ZFS_LINUX_TEST_SRC([inode_set_iversion], [ #include ],[ struct inode inode; inode_set_iversion(&inode, 1); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ + AC_MSG_CHECKING([whether inode_set_iversion() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_iversion], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_SET_IVERSION, 1, [inode_set_iversion() exists]) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index f5818411aa50..57e7f31fdcbb 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -2,11 +2,8 @@ dnl # dnl # 4.18 API change dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ - AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ + ZFS_LINUX_TEST_SRC([inode_times], [ #include ],[ struct inode ip; @@ -14,12 +11,16 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ memset(&ip, 0, sizeof(ip)); ts = ip.i_mtime; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ + AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) + ZFS_LINUX_TEST_RESULT([inode_times], [ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, [inode->i_*time's are timespec64]) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-insert-inode-locked.m4 b/config/kernel-insert-inode-locked.m4 index da141d180a9e..4990399c3f15 100644 --- a/config/kernel-insert-inode-locked.m4 +++ b/config/kernel-insert-inode-locked.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.28 API change dnl # Added insert_inode_locked() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INSERT_INODE_LOCKED], - [AC_MSG_CHECKING([whether insert_inode_locked() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED], [ + ZFS_LINUX_TEST_SRC([insert_inode_locked], [ #include ], [ insert_inode_locked(NULL); - ], [insert_inode_locked], [fs/inode.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INSERT_INODE_LOCKED], [ + AC_MSG_CHECKING([whether insert_inode_locked() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([insert_inode_locked], + [insert_inode_locked], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INSERT_INODE_LOCKED, 1, - [insert_inode_locked() is available]) + [insert_inode_locked() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-invalidate-bdev-args.m4 b/config/kernel-invalidate-bdev-args.m4 index 09c2ebf26e99..55a784dd91ab 100644 --- a/config/kernel-invalidate-bdev-args.m4 +++ b/config/kernel-invalidate-bdev-args.m4 @@ -2,17 +2,21 @@ dnl # dnl # 2.6.22 API change dnl # Unused destroy_dirty_buffers arg removed from prototype. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS], [ - AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV], [ + ZFS_LINUX_TEST_SRC([invalidate_bdev], [ #include ],[ struct block_device *bdev = NULL; invalidate_bdev(bdev); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV], [ + AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg]) + ZFS_LINUX_TEST_RESULT([invalidate_bdev], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_INVALIDATE_BDEV, 1, - [invalidate_bdev() wants 1 arg]) + [invalidate_bdev() wants 1 arg]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-is_owner_or_cap.m4 b/config/kernel-is_owner_or_cap.m4 index da07e58dda35..ab80724091a7 100644 --- a/config/kernel-is_owner_or_cap.m4 +++ b/config/kernel-is_owner_or_cap.m4 @@ -4,33 +4,40 @@ dnl # The is_owner_or_cap() macro was renamed to inode_owner_or_capable(), dnl # This is used for permission checks in the xattr and file attribute call dnl # paths. dnl # -AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ - AC_MSG_CHECKING([whether inode_owner_or_capable() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [ + ZFS_LINUX_TEST_SRC([inode_owner_or_capable], [ #include ],[ struct inode *ip = NULL; (void) inode_owner_or_capable(ip); + ]) + + + ZFS_LINUX_TEST_SRC([is_owner_or_cap], [ + #include + #include ],[ + struct inode *ip = NULL; + (void) is_owner_or_cap(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ + AC_MSG_CHECKING([whether inode_owner_or_capable() exists]) + ZFS_LINUX_TEST_RESULT([inode_owner_or_capable], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE, 1, [inode_owner_or_capable() exists]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether is_owner_or_cap() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct inode *ip = NULL; - (void) is_owner_or_cap(ip); - ],[ + + ZFS_LINUX_TEST_RESULT([is_owner_or_cap], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IS_OWNER_OR_CAP, 1, [is_owner_or_cap() exists]) ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) + ZFS_LINUX_TEST_ERROR([capability]) ]) ]) ]) diff --git a/config/kernel-kmap-atomic-args.m4 b/config/kernel-kmap-atomic-args.m4 index beb1692e721b..d09e93d7ffeb 100644 --- a/config/kernel-kmap-atomic-args.m4 +++ b/config/kernel-kmap-atomic-args.m4 @@ -3,17 +3,21 @@ dnl # 2.6.37 API change dnl # kmap_atomic changed from assigning hard-coded named slot to using dnl # push/pop based dynamical allocation. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS], [ - AC_MSG_CHECKING([whether kmap_atomic wants 1 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS], [ + ZFS_LINUX_TEST_SRC([kmap_atomic], [ #include ],[ struct page page; kmap_atomic(&page); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS], [ + AC_MSG_CHECKING([whether kmap_atomic wants 1 args]) + ZFS_LINUX_TEST_RESULT([kmap_atomic], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_KMAP_ATOMIC, 1, - [kmap_atomic wants 1 args]) + [kmap_atomic wants 1 args]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-kmem-cache.m4 b/config/kernel-kmem-cache.m4 index 21cc53d34933..7576e6cfd850 100644 --- a/config/kernel-kmem-cache.m4 +++ b/config/kernel-kmem-cache.m4 @@ -5,30 +5,36 @@ dnl # private allocation flags which are applied when allocating a new slab dnl # in kmem_getpages(). Unfortunately there is no public API for setting dnl # non-default flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS], [ - AC_MSG_CHECKING([whether struct kmem_cache has allocflags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS], [ + ZFS_LINUX_TEST_SRC([kmem_cache_allocflags], [ #include ],[ struct kmem_cache cachep __attribute__ ((unused)); cachep.allocflags = GFP_KERNEL; + ]) + + ZFS_LINUX_TEST_SRC([kmem_cache_gfpflags], [ + #include ],[ + struct kmem_cache cachep __attribute__ ((unused)); + cachep.gfpflags = GFP_KERNEL; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS], [ + AC_MSG_CHECKING([whether struct kmem_cache has allocflags]) + ZFS_LINUX_TEST_RESULT([kmem_cache_allocflags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1, - [struct kmem_cache has allocflags]) + [struct kmem_cache has allocflags]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether struct kmem_cache has gfpflags]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct kmem_cache cachep __attribute__ ((unused)); - cachep.gfpflags = GFP_KERNEL; - ],[ + ZFS_LINUX_TEST_RESULT([kmem_cache_gfpflags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1, - [struct kmem_cache has gfpflags]) + [struct kmem_cache has gfpflags]) ],[ AC_MSG_RESULT(no) ]) @@ -40,16 +46,10 @@ dnl # grsecurity API change, dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by dnl # kmem_cache_create_usercopy(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ - AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY], [ + ZFS_LINUX_TEST_SRC([kmem_cache_create_usercopy], [ #include - static void ctor(void *foo) - { - // fake ctor - } + static void ctor(void *foo) { /* fake ctor */ } ],[ struct kmem_cache *skc_linux_cache; const char *name = "test"; @@ -60,13 +60,27 @@ AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ size_t usersize = size - useroffset; skc_linux_cache = kmem_cache_create_usercopy( - name, size, align, flags, useroffset, usersize, ctor); - ],[ + name, size, align, flags, useroffset, usersize, ctor); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ + AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) + ZFS_LINUX_TEST_RESULT([kmem_cache_create_usercopy], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1, - [kmem_cache_create_usercopy() exists]) + [kmem_cache_create_usercopy() exists]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE], [ + ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS + ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY +]) + +AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE], [ + ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS + ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY ]) diff --git a/config/kernel-kstrtoul.m4 b/config/kernel-kstrtoul.m4 new file mode 100644 index 000000000000..ef3c9843cce5 --- /dev/null +++ b/config/kernel-kstrtoul.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 2.6.39 API change +dnl # +dnl # If kstrtoul() doesn't exist, fallback to use strict_strtoul() which has +dnl # existed since 2.6.25. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_KSTRTOUL], [ + ZFS_LINUX_TEST_SRC([kstrtoul], [ + #include + ],[ + int ret __attribute__ ((unused)) = kstrtoul(NULL, 10, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ + AC_MSG_CHECKING([whether kstrtoul() exists]) + ZFS_LINUX_TEST_RESULT([kstrtoul], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-ktime_get_coarse_real_ts64.m4 b/config/kernel-ktime_get_coarse_real_ts64.m4 index d6be8c4185ac..28492bf04bcb 100644 --- a/config/kernel-ktime_get_coarse_real_ts64.m4 +++ b/config/kernel-ktime_get_coarse_real_ts64.m4 @@ -2,16 +2,21 @@ dnl # dnl # 4.18: ktime_get_coarse_real_ts64() added. Use it in place of dnl # current_kernel_time64(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], - [AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ + ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ #include ], [ struct timespec64 ts; ktime_get_coarse_real_ts64(&ts); - ], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ + AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) + ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, [ktime_get_coarse_real_ts64() exists]) + AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, + [ktime_get_coarse_real_ts64() exists]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-kuid-helpers.m4 b/config/kernel-kuid-helpers.m4 index 60713b9d3132..4bc4e039d8c3 100644 --- a/config/kernel-kuid-helpers.m4 +++ b/config/kernel-kuid-helpers.m4 @@ -5,14 +5,18 @@ dnl # became necessary to go through one more level of indirection dnl # when dealing with uid/gid - namely the kuid type. dnl # dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUID_HELPERS], [ - AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUID_HELPERS], [ + ZFS_LINUX_TEST_SRC([i_uid_read], [ #include ],[ struct inode *ip = NULL; (void) i_uid_read(ip); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUID_HELPERS], [ + AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist]) + ZFS_LINUX_TEST_RESULT([i_uid_read], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KUID_HELPERS, 1, [i_(uid|gid)_(read|write) exist]) diff --git a/config/kernel-kuidgid.m4 b/config/kernel-kuidgid.m4 index 82685d26369e..15bf98154e82 100644 --- a/config/kernel-kuidgid.m4 +++ b/config/kernel-kuidgid.m4 @@ -3,20 +3,26 @@ dnl # User namespaces, use kuid_t in place of uid_t dnl # where available. Not strictly a user namespaces thing dnl # but it should prevent surprises dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUIDGID_T], [ - AC_MSG_CHECKING([whether kuid_t/kgid_t is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUIDGID_T], [ + ZFS_LINUX_TEST_SRC([kuidgid_t_init], [ #include ], [ kuid_t userid __attribute__ ((unused)) = KUIDT_INIT(0); kgid_t groupid __attribute__ ((unused)) = KGIDT_INIT(0); - ],[ - ZFS_LINUX_TRY_COMPILE([ - #include - ], [ - kuid_t userid __attribute__ ((unused)) = 0; - kgid_t groupid __attribute__ ((unused)) = 0; - ],[ + ]) + + ZFS_LINUX_TEST_SRC([kuidgid_t], [ + #include + ], [ + kuid_t userid __attribute__ ((unused)) = 0; + kgid_t groupid __attribute__ ((unused)) = 0; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUIDGID_T], [ + AC_MSG_CHECKING([whether kuid_t/kgid_t is available]) + ZFS_LINUX_TEST_RESULT([kuidgid_t_init], [ + ZFS_LINUX_TEST_RESULT([kuidgid_t], [ AC_MSG_RESULT(yes; optional) ],[ AC_MSG_RESULT(yes; mandatory) diff --git a/config/kernel-lookup-bdev.m4 b/config/kernel-lookup-bdev.m4 index abbf55d9bb39..72b4993e1483 100644 --- a/config/kernel-lookup-bdev.m4 +++ b/config/kernel-lookup-bdev.m4 @@ -2,23 +2,33 @@ dnl # dnl # 2.6.27, lookup_bdev() was exported. dnl # 4.4.0-6.21 - x.y on Ubuntu, lookup_bdev() takes 2 arguments. dnl # -AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], - [AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_BDEV], [ + ZFS_LINUX_TEST_SRC([lookup_bdev_1arg], [ #include ], [ lookup_bdev(NULL); - ], [lookup_bdev], [fs/block_dev.c], [ + ]) + + ZFS_LINUX_TEST_SRC([lookup_bdev_2args], [ + #include + ], [ + lookup_bdev(NULL, FMODE_READ); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], [ + AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg]) + ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg], + [lookup_bdev], [fs/block_dev.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, [lookup_bdev() wants 1 arg]) + AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, + [lookup_bdev() wants 1 arg]) ], [ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether lookup_bdev() wants 2 args]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include - ], [ - lookup_bdev(NULL, FMODE_READ); - ], [lookup_bdev], [fs/block_dev.c], [ + ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_2args], + [lookup_bdev], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_LOOKUP_BDEV, 1, [lookup_bdev() wants 2 args]) @@ -26,4 +36,4 @@ AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], AC_MSG_RESULT(no) ]) ]) -]) \ No newline at end of file +]) diff --git a/config/kernel-lookup-nameidata.m4 b/config/kernel-lookup-nameidata.m4 index 5453be5e8e38..865b8aff8513 100644 --- a/config/kernel-lookup-nameidata.m4 +++ b/config/kernel-lookup-nameidata.m4 @@ -1,9 +1,8 @@ dnl # dnl # 3.6 API change dnl # -AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ - AC_MSG_CHECKING([whether iops->lookup() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA], [ + ZFS_LINUX_TEST_SRC([lookup_nameidata], [ #include #include @@ -15,11 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ __attribute__ ((unused)) = { .lookup = inode_lookup, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [ + AC_MSG_CHECKING([whether iops->lookup() passes nameidata]) + ZFS_LINUX_TEST_RESULT([lookup_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_LOOKUP_NAMEIDATA, 1, - [iops->lookup() passes nameidata]) + [iops->lookup() passes nameidata]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-lseek-execute.m4 b/config/kernel-lseek-execute.m4 index 8c4032b92c6b..652f611f8da4 100644 --- a/config/kernel-lseek-execute.m4 +++ b/config/kernel-lseek-execute.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.11 API change dnl # lseek_execute helper exported dnl # -AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], - [AC_MSG_CHECKING([whether lseek_execute() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE], [ + ZFS_LINUX_TEST_SRC([lseek_execute], [ #include ], [ struct file *fp __attribute__ ((unused)) = NULL; @@ -13,10 +12,15 @@ AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], loff_t maxsize __attribute__ ((unused)) = 0; lseek_execute(fp, ip, offset, maxsize); - ], [lseek_exclusive], [fs/read_write.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], [ + AC_MSG_CHECKING([whether lseek_execute() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([lseek_execute], + [lseek_exclusive], [fs/read_write.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, - [lseek_execute() is available]) + AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, [lseek_execute() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 new file mode 100644 index 000000000000..86339aa0450d --- /dev/null +++ b/config/kernel-make-request-fn.m4 @@ -0,0 +1,77 @@ +dnl # +dnl # Check for make_request_fn interface. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ + ZFS_LINUX_TEST_SRC([make_request_fn_int], [ + #include + int make_request(struct request_queue *q, + struct bio *bio) { return (0); } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) + + ZFS_LINUX_TEST_SRC([make_request_fn_void], [ + #include + void make_request(struct request_queue *q, + struct bio *bio) { return; } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) + + ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ + #include + blk_qc_t make_request(struct request_queue *q, + struct bio *bio) { return (BLK_QC_T_NONE); } + ],[ + blk_queue_make_request(NULL, &make_request); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ + dnl # + dnl # Legacy API + dnl # make_request_fn returns int. + dnl # + AC_MSG_CHECKING([whether make_request_fn() returns int]) + ZFS_LINUX_TEST_RESULT([make_request_fn_int], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, int, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, + [Noting that make_request_fn() returns int]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # Linux 3.2 API Change + dnl # make_request_fn returns void. + dnl # + AC_MSG_CHECKING([whether make_request_fn() returns void]) + ZFS_LINUX_TEST_RESULT([make_request_fn_void], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, void, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1, + [Noting that make_request_fn() returns void]) + ],[ + AC_MSG_RESULT(no) + + dnl # + dnl # Linux 4.4 API Change + dnl # make_request_fn returns blk_qc_t. + dnl # + AC_MSG_CHECKING( + [whether make_request_fn() returns blk_qc_t]) + ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, + [make_request_fn() return type]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, + [Noting that make_request_fn() ] + [returns blk_qc_t]) + ],[ + ZFS_LINUX_TEST_ERROR([make_request_fn]) + ]) + ]) + ]) +]) diff --git a/config/kernel-misc-minor.m4 b/config/kernel-misc-minor.m4 index a020d2ebca01..20fe2cd2f3cd 100644 --- a/config/kernel-misc-minor.m4 +++ b/config/kernel-misc-minor.m4 @@ -6,7 +6,7 @@ dnl # number. Start with a large known available unreserved minor and work dnl # our way down to lower value if a collision is detected. dnl # AC_DEFUN([ZFS_AC_KERNEL_MISC_MINOR], [ - AC_MSG_CHECKING([for available /dev/zfs minor]) + AC_MSG_CHECKING([whether /dev/zfs minor is available]) for i in $(seq 249 -1 200); do if ! grep -q "^#define\s\+.*_MINOR\s\+.*$i" \ diff --git a/config/kernel-mk-request-fn.m4 b/config/kernel-mk-request-fn.m4 deleted file mode 100644 index 57eebe23de5b..000000000000 --- a/config/kernel-mk-request-fn.m4 +++ /dev/null @@ -1,65 +0,0 @@ -dnl # -dnl # Linux 3.2 API Change -dnl # make_request_fn returns void instead of int. -dnl # -dnl # Linux 4.4 API Change -dnl # make_request_fn returns blk_qc_t. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ - AC_MSG_CHECKING([whether make_request_fn() returns int]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int make_request(struct request_queue *q, struct bio *bio) - { - return (0); - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, int, - [make_request_fn() returns int]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, - [Noting that make_request_fn() returns int]) - ],[ - AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether make_request_fn() returns void]) - ZFS_LINUX_TRY_COMPILE([ - #include - - void make_request(struct request_queue *q, struct bio *bio) - { - return; - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, void, - [make_request_fn() returns void]) - ],[ - AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t]) - ZFS_LINUX_TRY_COMPILE([ - #include - - blk_qc_t make_request(struct request_queue *q, struct bio *bio) - { - return (BLK_QC_T_NONE); - } - ],[ - blk_queue_make_request(NULL, &make_request); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, - [make_request_fn() returns blk_qc_t]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, - [Noting that make_request_fn() returns blk_qc_t]) - ],[ - AC_MSG_ERROR(no - Please file a bug report at - https://github.com/zfsonlinux/zfs/issues/new) - ]) - ]) - ]) -]) diff --git a/config/kernel-mkdir-umode-t.m4 b/config/kernel-mkdir-umode-t.m4 index ebc21be9ec55..f4dde29a37ee 100644 --- a/config/kernel-mkdir-umode-t.m4 +++ b/config/kernel-mkdir-umode-t.m4 @@ -4,11 +4,10 @@ dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a dnl # umode_t type rather than an int. The expectation is that any backport dnl # would also change all three prototypes. However, if it turns out that dnl # some distribution doesn't backport the whole thing this could be -dnl # broken apart in to three separate checks. +dnl # broken apart into three separate checks. dnl # -AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ - AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T], [ + ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [ #include int mkdir(struct inode *inode, struct dentry *dentry, @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ iops __attribute__ ((unused)) = { .mkdir = mkdir, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [ + AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t]) + ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, [iops->create()/mkdir()/mknod() take umode_t]) diff --git a/config/kernel-mod-param.m4 b/config/kernel-mod-param.m4 index b72be684a44e..e00f19d61e7d 100644 --- a/config/kernel-mod-param.m4 +++ b/config/kernel-mod-param.m4 @@ -2,9 +2,8 @@ dnl # dnl # Grsecurity kernel API change dnl # constified parameters of module_param_call() methods dnl # -AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ - AC_MSG_CHECKING([whether module_param_call() is hardened]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST], [ + ZFS_LINUX_TEST_SRC([module_param_call], [ #include #include @@ -19,8 +18,12 @@ AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ } module_param_call(p, param_set, param_get, NULL, 0644); - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [ + AC_MSG_CHECKING([whether module_param_call() is hardened]) + ZFS_LINUX_TEST_RESULT([module_param_call], [ AC_MSG_RESULT(yes) AC_DEFINE(MODULE_PARAM_CALL_CONST, 1, [hardened module_param_call]) diff --git a/config/kernel-objtool.m4 b/config/kernel-objtool.m4 index 467329b2541e..bf60e7869213 100644 --- a/config/kernel-objtool.m4 +++ b/config/kernel-objtool.m4 @@ -1,41 +1,44 @@ dnl # -dnl # 4.6 API for compile-time stack validation +dnl # Check for objtool support. dnl # -AC_DEFUN([ZFS_AC_KERNEL_OBJTOOL], [ - AC_MSG_CHECKING([for compile-time stack validation (objtool)]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_OBJTOOL], [ + + dnl # 4.6 API for compile-time stack validation + ZFS_LINUX_TEST_SRC([objtool], [ #undef __ASSEMBLY__ #include ],[ #if !defined(FRAME_BEGIN) CTASSERT(1); #endif - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_OBJTOOL, 1, - [kernel does stack verification]) - - ZFS_AC_KERNEL_STACK_FRAME_NON_STANDARD - ],[ - AC_MSG_RESULT(no) ]) -]) -dnl # -dnl # 4.6 API added STACK_FRAME_NON_STANDARD macro -dnl # -AC_DEFUN([ZFS_AC_KERNEL_STACK_FRAME_NON_STANDARD], [ - AC_MSG_CHECKING([whether STACK_FRAME_NON_STANDARD is defined]) - ZFS_LINUX_TRY_COMPILE([ + dnl # 4.6 API added STACK_FRAME_NON_STANDARD macro + ZFS_LINUX_TEST_SRC([stack_frame_non_standard], [ #include ],[ #if !defined(STACK_FRAME_NON_STANDARD) CTASSERT(1); #endif - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_OBJTOOL], [ + AC_MSG_CHECKING( + [whether compile-time stack validation (objtool) is available]) + ZFS_LINUX_TEST_RESULT([objtool], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_STACK_FRAME_NON_STANDARD, 1, - [STACK_FRAME_NON_STANDARD is defined]) + AC_DEFINE(HAVE_KERNEL_OBJTOOL, 1, + [kernel does stack verification]) + + AC_MSG_CHECKING([whether STACK_FRAME_NON_STANDARD is defined]) + ZFS_LINUX_TEST_RESULT([stack_frame_non_standard], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_STACK_FRAME_NON_STANDARD, 1, + [STACK_FRAME_NON_STANDARD is defined]) + ],[ + AC_MSG_RESULT(no) + ]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-open-bdev-exclusive.m4 b/config/kernel-open-bdev-exclusive.m4 index 0661315a610f..2e46b8876a40 100644 --- a/config/kernel-open-bdev-exclusive.m4 +++ b/config/kernel-open-bdev-exclusive.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.28 API change dnl # open/close_bdev_excl() renamed to open/close_bdev_exclusive() dnl # -AC_DEFUN([ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE], - [AC_MSG_CHECKING([whether open_bdev_exclusive() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE], [ + ZFS_LINUX_TEST_SRC([open_bdev_exclusive], [ #include ], [ open_bdev_exclusive(NULL, 0, NULL); - ], [open_bdev_exclusive], [fs/block_dev.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE], [ + AC_MSG_CHECKING([whether open_bdev_exclusive() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([open_bdev_exclusive], + [open_bdev_exclusive], [fs/block_dev.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_OPEN_BDEV_EXCLUSIVE, 1, - [open_bdev_exclusive() is available]) + [open_bdev_exclusive() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-pde-data.m4 b/config/kernel-pde-data.m4 index 8aa4c2204e8e..928c5ef0d885 100644 --- a/config/kernel-pde-data.m4 +++ b/config/kernel-pde-data.m4 @@ -2,15 +2,19 @@ dnl # dnl # 3.10 API change, dnl # PDE is replaced by PDE_DATA dnl # -AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [ - AC_MSG_CHECKING([whether PDE_DATA() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_PDE_DATA], [ + ZFS_LINUX_TEST_SRC([pde_data], [ #include ], [ PDE_DATA(NULL); - ], [PDE_DATA], [], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [ + AC_MSG_CHECKING([whether PDE_DATA() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([pde_data], [PDE_DATA], [], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PDE_DATA, 1, [yes]) + AC_DEFINE(HAVE_PDE_DATA, 1, [PDE_DATA is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-put-link.m4 b/config/kernel-put-link.m4 index a0bb36ef2772..f03df9e99bff 100644 --- a/config/kernel-put-link.m4 +++ b/config/kernel-put-link.m4 @@ -1,17 +1,35 @@ dnl # dnl # Supported symlink APIs dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ + ZFS_LINUX_TEST_SRC([put_link_cookie], [ + #include + void put_link(struct inode *ip, void *cookie) + { return; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .put_link = put_link, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([put_link_nameidata], [ + #include + void put_link(struct dentry *de, struct + nameidata *nd, void *ptr) { return; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .put_link = put_link, + }; + ],[]) +]) + AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ dnl # dnl # 4.5 API change dnl # get_link() uses delayed done, there is no put_link() interface. + dnl # This check intially uses the inode_operations_get_link result dnl # - ZFS_LINUX_TRY_COMPILE([ - #if !defined(HAVE_GET_LINK_DELAYED) - #error "Expecting get_link() delayed done" - #endif - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ AC_DEFINE(HAVE_PUT_LINK_DELAYED, 1, [iops->put_link() delayed]) ],[ dnl # @@ -19,41 +37,24 @@ AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ dnl # This kernel retired the nameidata structure. dnl # AC_MSG_CHECKING([whether iops->put_link() passes cookie]) - ZFS_LINUX_TRY_COMPILE([ - #include - void put_link(struct inode *ip, void *cookie) - { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([put_link_cookie], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PUT_LINK_COOKIE, 1, [iops->put_link() cookie]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 2.6.32 API dnl # - AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether iops->put_link() passes nameidata]) - ZFS_LINUX_TRY_COMPILE([ - #include - void put_link(struct dentry *de, struct - nameidata *nd, void *ptr) { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([put_link_nameidata], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PUT_LINK_NAMEIDATA, 1, [iops->put_link() nameidata]) ],[ - AC_MSG_ERROR(no; please file a bug report) + ZFS_LINUX_TEST_ERROR([put_link]) ]) ]) ]) diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 9f894fb4db1a..f707391539d8 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -3,9 +3,8 @@ dnl # 4.9 API change, dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants dnl # flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ - AC_MSG_CHECKING([whether iops->rename() wants flags]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [ + ZFS_LINUX_TEST_SRC([inode_operations_rename], [ #include int rename_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, @@ -15,10 +14,15 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ iops __attribute__ ((unused)) = { .rename = rename_fn, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, [iops->rename() wants flags]) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-rw.m4 b/config/kernel-rw.m4 index 1c8a265e0ae0..85b47d5c6fc2 100644 --- a/config/kernel-rw.m4 +++ b/config/kernel-rw.m4 @@ -3,11 +3,8 @@ dnl # 4.14 API change dnl # kernel_write() which was introduced in 3.9 was updated to take dnl # the offset as a pointer which is needed by vn_rdwr(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ - AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITE], [ + ZFS_LINUX_TEST_SRC([kernel_write], [ #include ],[ struct file *file = NULL; @@ -17,14 +14,18 @@ AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ ssize_t ret; ret = kernel_write(file, buf, count, pos); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ + AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) + ZFS_LINUX_TEST_RESULT([kernel_write], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1, [kernel_write() take loff_t pointer]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -32,11 +33,8 @@ dnl # 4.14 API change dnl # kernel_read() which has existed for forever was updated to take dnl # the offset as a pointer which is needed by vn_rdwr(). dnl # -AC_DEFUN([ZFS_AC_KERNEL_READ], [ - AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_READ], [ + ZFS_LINUX_TEST_SRC([kernel_read], [ #include ],[ struct file *file = NULL; @@ -46,12 +44,26 @@ AC_DEFUN([ZFS_AC_KERNEL_READ], [ ssize_t ret; ret = kernel_read(file, buf, count, pos); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_READ], [ + AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) + ZFS_LINUX_TEST_RESULT([kernel_read], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1, [kernel_read() take loff_t pointer]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_RW], [ + ZFS_AC_KERNEL_SRC_WRITE + ZFS_AC_KERNEL_SRC_READ +]) + +AC_DEFUN([ZFS_AC_KERNEL_RW], [ + ZFS_AC_KERNEL_WRITE + ZFS_AC_KERNEL_READ ]) diff --git a/config/kernel-rwsem.m4 b/config/kernel-rwsem.m4 index 532c22718189..67c5cf908a32 100644 --- a/config/kernel-rwsem.m4 +++ b/config/kernel-rwsem.m4 @@ -4,25 +4,26 @@ dnl # dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1. dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [ - AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW], [ + ZFS_LINUX_TEST_SRC([rwsem_spinlock_is_raw], [ #include ],[ struct rw_semaphore dummy_semaphore __attribute__ ((unused)); raw_spinlock_t dummy_lock __attribute__ ((unused)) = __RAW_SPIN_LOCK_INITIALIZER(dummy_lock); dummy_semaphore.wait_lock = dummy_lock; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [ + AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw]) + ZFS_LINUX_TEST_RESULT([rwsem_spinlock_is_raw], [ AC_MSG_RESULT(yes) AC_DEFINE(RWSEM_SPINLOCK_IS_RAW, 1, - [struct rw_semaphore member wait_lock is raw_spinlock_t]) + [struct rw_semaphore member wait_lock is raw_spinlock_t]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -30,23 +31,24 @@ dnl # 3.16 API Change dnl # dnl # rwsem-spinlock "->activity" changed to "->count" dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ - AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY], [ + ZFS_LINUX_TEST_SRC([rwsem_activity], [ #include ],[ struct rw_semaphore dummy_semaphore __attribute__ ((unused)); dummy_semaphore.activity = 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ + AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) + ZFS_LINUX_TEST_RESULT([rwsem_activity], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1, - [struct rw_semaphore has member activity]) + [struct rw_semaphore has member activity]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # @@ -54,22 +56,35 @@ dnl # 4.8 API Change dnl # dnl # rwsem "->count" changed to atomic_long_t type dnl # -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ - AC_MSG_CHECKING( - [whether struct rw_semaphore has atomic_long_t member count]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT], [ + ZFS_LINUX_TEST_SRC([rwsem_atomic_long_count], [ #include ],[ DECLARE_RWSEM(dummy_semaphore); (void) atomic_long_read(&dummy_semaphore.count); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ + AC_MSG_CHECKING( + [whether struct rw_semaphore has atomic_long_t member count]) + ZFS_LINUX_TEST_RESULT([rwsem_atomic_long_count], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1, - [struct rw_semaphore has atomic_long_t member count]) + [struct rw_semaphore has atomic_long_t member count]) ],[ AC_MSG_RESULT(no) ]) - EXTRA_KCFLAGS="$tmp_flags" +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [ + ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW + ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY + ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT +]) + +AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [ + ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW + ZFS_AC_KERNEL_RWSEM_ACTIVITY + ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT ]) diff --git a/config/kernel-sched.m4 b/config/kernel-sched.m4 index 640b008aab8d..4a7db970aef4 100644 --- a/config/kernel-sched.m4 +++ b/config/kernel-sched.m4 @@ -2,14 +2,18 @@ dnl # dnl # 3.9 API change, dnl # Moved things from linux/sched.h to linux/sched/rt.h dnl # -AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], - [AC_MSG_CHECKING([whether header linux/sched/rt.h exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER], [ + ZFS_LINUX_TEST_SRC([sched_rt_header], [ #include #include ],[ return 0; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], [ + AC_MSG_CHECKING([whether header linux/sched/rt.h exists]) + ZFS_LINUX_TEST_RESULT([sched_rt_header], [ AC_DEFINE(HAVE_SCHED_RT_HEADER, 1, [linux/sched/rt.h exists]) AC_MSG_RESULT(yes) ],[ @@ -21,36 +25,59 @@ dnl # dnl # 4.11 API change, dnl # Moved things from linux/sched.h to linux/sched/signal.h dnl # -AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], - [AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER], [ + ZFS_LINUX_TEST_SRC([sched_signal_header], [ #include #include ],[ return 0; - ],[ - AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, [linux/sched/signal.h exists]) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], [ + AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) + ZFS_LINUX_TEST_RESULT([sched_signal_header], [ + AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, + [linux/sched/signal.h exists]) AC_MSG_RESULT(yes) ],[ AC_MSG_RESULT(no) ]) ]) + dnl # dnl # 3.19 API change dnl # The io_schedule_timeout() function is present in all 2.6.32 kernels dnl # but it was not exported until Linux 3.19. The RHEL 7.x kernels which dnl # are based on a 3.10 kernel do export this symbol. dnl # -AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ - AC_MSG_CHECKING([whether io_schedule_timeout() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT], [ + ZFS_LINUX_TEST_SRC([io_schedule_timeout], [ #include ], [ (void) io_schedule_timeout(1); - ], [io_schedule_timeout], [], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ + AC_MSG_CHECKING([whether io_schedule_timeout() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([io_schedule_timeout], + [io_schedule_timeout], [], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IO_SCHEDULE_TIMEOUT, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED], [ + ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER + ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER + ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT +]) + +AC_DEFUN([ZFS_AC_KERNEL_SCHED], [ + ZFS_AC_KERNEL_SCHED_RT_HEADER + ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER + ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT +]) diff --git a/config/kernel-security-inode-init.m4 b/config/kernel-security-inode-init.m4 index a62176d42b54..0dea7e3925be 100644 --- a/config/kernel-security-inode-init.m4 +++ b/config/kernel-security-inode-init.m4 @@ -5,9 +5,8 @@ dnl # qstr argument which must be passed in from the dentry if available. dnl # Passing a NULL is safe when no qstr is available the relevant dnl # security checks will just be skipped. dnl # -AC_DEFUN([ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY], [ - AC_MSG_CHECKING([whether security_inode_init_security wants 6 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS], [ + ZFS_LINUX_TEST_SRC([security_inode_init_security_6args], [ #include ],[ struct inode *ip __attribute__ ((unused)) = NULL; @@ -18,10 +17,15 @@ AC_DEFUN([ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY], [ size_t len __attribute__ ((unused)) = 0; security_inode_init_security(ip, dip, str, &name, &value, &len); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS], [ + AC_MSG_CHECKING([whether security_inode_init_security wants 6 args]) + ZFS_LINUX_TEST_RESULT([security_inode_init_security_6args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY, 1, - [security_inode_init_security wants 6 args]) + [security_inode_init_security wants 6 args]) ],[ AC_MSG_RESULT(no) ]) @@ -34,9 +38,8 @@ dnl # a filesystem specific callback to write security extended attributes. dnl # This was done to support the initialization of multiple LSM xattrs dnl # and the EVM xattr. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY], [ - AC_MSG_CHECKING([whether security_inode_init_security wants callback]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_CALLBACK], [ + ZFS_LINUX_TEST_SRC([security_inode_init_security], [ #include ],[ struct inode *ip __attribute__ ((unused)) = NULL; @@ -45,11 +48,26 @@ AC_DEFUN([ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY], [ initxattrs func __attribute__ ((unused)) = NULL; security_inode_init_security(ip, dip, str, func, NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_CALLBACK], [ + AC_MSG_CHECKING([whether security_inode_init_security wants callback]) + ZFS_LINUX_TEST_RESULT([security_inode_init_security], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY, 1, - [security_inode_init_security wants callback]) + [security_inode_init_security wants callback]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE], [ + ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS + ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_CALLBACK +]) + +AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE], [ + ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS + ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_CALLBACK +]) diff --git a/config/kernel-set-fs-pwd.m4 b/config/kernel-set-fs-pwd.m4 deleted file mode 100644 index d5565b42cb57..000000000000 --- a/config/kernel-set-fs-pwd.m4 +++ /dev/null @@ -1,39 +0,0 @@ -dnl # -dnl # 3.9 API change -dnl # set_fs_pwd takes const struct path * -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_FS_PWD_WITH_CONST], - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - [AC_MSG_CHECKING([whether set_fs_pwd() requires const struct path *]) - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - void (*const set_fs_pwd_func) - (struct fs_struct *, const struct path *) - = set_fs_pwd; - ],[ - return 0; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_FS_PWD_WITH_CONST, 1, - [set_fs_pwd() needs const path *]) - ],[ - ZFS_LINUX_TRY_COMPILE([ - #include - #include - #include - void (*const set_fs_pwd_func) - (struct fs_struct *, struct path *) - = set_fs_pwd; - ],[ - return 0; - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_ERROR(unknown) - ]) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-set-nlink.m4 b/config/kernel-set-nlink.m4 index f7ffc0d3a5e9..63a5a8c0dace 100644 --- a/config/kernel-set-nlink.m4 +++ b/config/kernel-set-nlink.m4 @@ -2,18 +2,21 @@ dnl # dnl # Linux v3.2-rc1 API change dnl # SHA: bfe8684869601dacfcb2cd69ef8cfd9045f62170 dnl # -AC_DEFUN([ZFS_AC_KERNEL_SET_NLINK], [ - AC_MSG_CHECKING([whether set_nlink() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_NLINK], [ + ZFS_LINUX_TEST_SRC([set_nlink], [ #include ],[ struct inode node; unsigned int link = 0; (void) set_nlink(&node, link); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SET_NLINK], [ + AC_MSG_CHECKING([whether set_nlink() is available]) + ZFS_LINUX_TEST_RESULT([set_nlink], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_NLINK, 1, - [set_nlink() is available]) + AC_DEFINE(HAVE_SET_NLINK, 1, [set_nlink() is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4 index 32f7deb77abe..45408c45c69b 100644 --- a/config/kernel-setattr-prepare.m4 +++ b/config/kernel-setattr-prepare.m4 @@ -3,17 +3,21 @@ dnl # 4.9 API change dnl # The inode_change_ok() function has been renamed setattr_prepare() dnl # and updated to take a dentry rather than an inode. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], - [AC_MSG_CHECKING([whether setattr_prepare() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [ + ZFS_LINUX_TEST_SRC([setattr_prepare], [ #include ], [ struct dentry *dentry = NULL; struct iattr *attr = NULL; - int error; + int error __attribute__ ((unused)) = + setattr_prepare(dentry, attr); + ]) +]) - error = setattr_prepare(dentry, attr); - ], [setattr_prepare], [fs/attr.c], [ +AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [ + AC_MSG_CHECKING([whether setattr_prepare() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare], + [setattr_prepare], [fs/attr.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SETATTR_PREPARE, 1, [setattr_prepare() is available]) diff --git a/config/kernel-sget-args.m4 b/config/kernel-sget-args.m4 index 9d1745925f3c..13581399ecc6 100644 --- a/config/kernel-sget-args.m4 +++ b/config/kernel-sget-args.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.6 API change, dnl # 'sget' now takes the mount flags as an argument. dnl # -AC_DEFUN([ZFS_AC_KERNEL_5ARG_SGET], - [AC_MSG_CHECKING([whether sget() wants 5 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SGET], [ + ZFS_LINUX_TEST_SRC([sget_5args], [ #include ],[ struct file_system_type *type = NULL; @@ -13,11 +12,15 @@ AC_DEFUN([ZFS_AC_KERNEL_5ARG_SGET], int flags = 0; void *data = NULL; (void) sget(type, test, set, flags, data); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SGET], [ + AC_MSG_CHECKING([whether sget() wants 5 args]) + ZFS_LINUX_TEST_RESULT([sget_5args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_5ARG_SGET, 1, [sget() wants 5 args]) ],[ AC_MSG_RESULT(no) ]) ]) - diff --git a/config/kernel-show-options.m4 b/config/kernel-show-options.m4 index 67d683c55e3a..9e426bc39176 100644 --- a/config/kernel-show-options.m4 +++ b/config/kernel-show-options.m4 @@ -1,21 +1,26 @@ dnl # dnl # Linux 3.3 API dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHOW_OPTIONS], [ - AC_MSG_CHECKING([whether sops->show_options() wants dentry]) - - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHOW_OPTIONS], [ + ZFS_LINUX_TEST_SRC([super_operations_show_options], [ #include - int show_options (struct seq_file * x, struct dentry * y) { return 0; }; + int show_options(struct seq_file * x, struct dentry * y) { + return 0; + }; + static struct super_operations sops __attribute__ ((unused)) = { .show_options = show_options, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHOW_OPTIONS], [ + AC_MSG_CHECKING([whether sops->show_options() wants dentry]) + ZFS_LINUX_TEST_RESULT([super_operations_show_options], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_SHOW_OPTIONS_WITH_DENTRY, 1, - [sops->show_options() with dentry]) + [sops->show_options() with dentry]) ],[ AC_MSG_RESULT([no]) ]) diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 37da0ec721aa..45b4b5d4b2c8 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -4,9 +4,8 @@ dnl # The super_block structure now stores a per-filesystem shrinker. dnl # This interface is preferable because it can be used to specifically dnl # target only the zfs filesystem for pruning. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK], [ - AC_MSG_CHECKING([whether super_block has s_shrink]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ + ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include int shrink(struct shrinker *s, struct shrink_control *sc) @@ -18,8 +17,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK], [ .s_shrink.seeks = DEFAULT_SEEKS, .s_shrink.batch = 0, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ + AC_MSG_CHECKING([whether super_block has s_shrink]) + ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK, 1, [struct super_block has s_shrink]) @@ -50,15 +53,18 @@ dnl # a list_head is used. Then to prevent the spinning from occurring dnl # the .next pointer is set to the fs_supers list_head which ensures dnl # the iterate_supers_type() function will always terminate. dnl # -AC_DEFUN([ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD], [ - AC_MSG_CHECKING([whether super_block has s_instances list_head]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [ + ZFS_LINUX_TEST_SRC([super_block_s_instances_list_head], [ #include ],[ struct super_block sb __attribute__ ((unused)); - INIT_LIST_HEAD(&sb.s_instances); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [ + AC_MSG_CHECKING([whether super_block has s_instances list_head]) + ZFS_LINUX_TEST_RESULT([super_block_s_instances_list_head], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_S_INSTANCES_LIST_HEAD, 1, [struct super_block has s_instances list_head]) @@ -67,9 +73,8 @@ AC_DEFUN([ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ - AC_MSG_CHECKING([whether sops->nr_cached_objects() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS], [ + ZFS_LINUX_TEST_SRC([nr_cached_objects], [ #include int nr_cached_objects(struct super_block *sb) { return 0; } @@ -78,19 +83,22 @@ AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ sops __attribute__ ((unused)) = { .nr_cached_objects = nr_cached_objects, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [ + AC_MSG_CHECKING([whether sops->nr_cached_objects() exists]) + ZFS_LINUX_TEST_RESULT([nr_cached_objects], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_NR_CACHED_OBJECTS, 1, - [sops->nr_cached_objects() exists]) + [sops->nr_cached_objects() exists]) ],[ AC_MSG_RESULT(no) ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ - AC_MSG_CHECKING([whether sops->free_cached_objects() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS], [ + ZFS_LINUX_TEST_SRC([free_cached_objects], [ #include void free_cached_objects(struct super_block *sb, int x) @@ -100,11 +108,15 @@ AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ sops __attribute__ ((unused)) = { .free_cached_objects = free_cached_objects, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [ + AC_MSG_CHECKING([whether sops->free_cached_objects() exists]) + ZFS_LINUX_TEST_RESULT([free_cached_objects], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FREE_CACHED_OBJECTS, 1, - [sops->free_cached_objects() exists]) + [sops->free_cached_objects() exists]) ],[ AC_MSG_RESULT(no) ]) @@ -115,15 +127,19 @@ dnl # 3.12 API change dnl # The nid member was added to struct shrink_control to support dnl # NUMA-aware shrinkers. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ - AC_MSG_CHECKING([whether shrink_control has nid]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ + ZFS_LINUX_TEST_SRC([shrink_control_nid], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); unsigned long scnidsize __attribute__ ((unused)) = sizeof(sc.nid); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ + AC_MSG_CHECKING([whether shrink_control has nid]) + ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ AC_MSG_RESULT(yes) AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, [struct shrink_control has nid]) @@ -132,78 +148,96 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ + ZFS_LINUX_TEST_SRC([shrinker_cb_2arg], [ + #include + int shrinker_cb(int nr_to_scan, gfp_t gfp_mask) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .shrink = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) -AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - dnl # - dnl # 2.6.23 to 2.6.34 API change - dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask) - dnl # - AC_MSG_CHECKING([whether old 2-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([shrinker_cb_3arg], [ #include + int shrinker_cb(struct shrinker *shrink, int nr_to_scan, + gfp_t gfp_mask) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .shrink = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) - int shrinker_cb(int nr_to_scan, gfp_t gfp_mask); + ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ + #include + int shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); + ]) + + ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ + #include + unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } ],[ + struct shrinker cache_shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + register_shrinker(&cache_shrinker); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ + dnl # + dnl # 2.6.23 to 2.6.34 API change + dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask) + dnl # + AC_MSG_CHECKING([whether old 2-argument shrinker exists]) + ZFS_LINUX_TEST_RESULT([shrinker_cb_2arg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_OLD_SHRINKER_CALLBACK, 1, - [old shrinker callback wants 2 args]) + [old shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 2.6.35 - 2.6.39 API change dnl # ->shrink(struct shrinker *, dnl # int nr_to_scan, gfp_t gfp_mask) dnl # AC_MSG_CHECKING([whether old 3-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int shrinker_cb(struct shrinker *, int nr_to_scan, - gfp_t gfp_mask); - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + ZFS_LINUX_TEST_RESULT([shrinker_cb_3arg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_SHRINKER_CALLBACK, 1, [old shrinker callback wants 3 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 3.0 - 3.11 API change dnl # ->shrink(struct shrinker *, dnl # struct shrink_control *sc) dnl # AC_MSG_CHECKING( - [whether new 2-argument shrinker exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int shrinker_cb(struct shrinker *, - struct shrink_control *sc); - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + [whether new 2-argument shrinker exists]) + ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_NEW_SHRINKER_CALLBACK, 1, [new shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) + dnl # dnl # 3.12 API change, dnl # ->shrink() is logically split in to @@ -211,50 +245,61 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # AC_MSG_CHECKING( [whether ->count_objects callback exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - - unsigned long shrinker_cb( - struct shrinker *, - struct shrink_control *sc); - ],[ - struct shrinker cache_shrinker = { - .count_objects = shrinker_cb, - .scan_objects = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ],[ + ZFS_LINUX_TEST_RESULT( + [shrinker_cb_shrink_control_split], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, [->count_objects exists]) ],[ - AC_MSG_ERROR(error) + ZFS_LINUX_TEST_ERROR([shrinker]) ]) ]) ]) ]) - EXTRA_KCFLAGS="$tmp_flags" ]) dnl # dnl # 2.6.39 API change, dnl # Shrinker adjust to use common shrink_control structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ - AC_MSG_CHECKING([whether struct shrink_control exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ + ZFS_LINUX_TEST_SRC([shrink_control_struct], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); sc.nr_to_scan = 0; sc.gfp_mask = GFP_KERNEL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ + AC_MSG_CHECKING([whether struct shrink_control exists]) + ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, - [struct shrink_control exists]) + [struct shrink_control exists]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD + ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS + ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS + ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID + ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK + ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT +]) + +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ + ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK + ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD + ZFS_AC_KERNEL_NR_CACHED_OBJECTS + ZFS_AC_KERNEL_FREE_CACHED_OBJECTS + ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID + ZFS_AC_KERNEL_SHRINKER_CALLBACK + ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT +]) diff --git a/config/kernel-spinlock.m4 b/config/kernel-spinlock.m4 deleted file mode 100644 index d6d6640070b5..000000000000 --- a/config/kernel-spinlock.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.36 API change, -dnl # The 'struct fs_struct->lock' was changed from a rwlock_t to -dnl # a spinlock_t to improve the fastpath performance. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK], [ - AC_MSG_CHECKING([whether struct fs_struct uses spinlock_t]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - static struct fs_struct fs; - spin_lock_init(&fs.lock); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FS_STRUCT_SPINLOCK, 1, - [struct fs_struct uses spinlock_t]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-submit_bio.m4 b/config/kernel-submit_bio.m4 index da5f85ca72cb..cf80e9b83e39 100644 --- a/config/kernel-submit_bio.m4 +++ b/config/kernel-submit_bio.m4 @@ -3,15 +3,19 @@ dnl # 4.8 API change dnl # The rw argument has been removed from submit_bio/submit_bio_wait. dnl # Callers are now expected to set bio->bi_rw instead of passing it in. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SUBMIT_BIO], [ - AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUBMIT_BIO], [ + ZFS_LINUX_TEST_SRC([submit_bio], [ #include ],[ blk_qc_t blk_qc; struct bio *bio = NULL; blk_qc = submit_bio(bio); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUBMIT_BIO], [ + AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) + ZFS_LINUX_TEST_RESULT([submit_bio], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg]) ],[ diff --git a/config/kernel-super-userns.m4 b/config/kernel-super-userns.m4 index de94ad967ac3..1ad35f2d19ba 100644 --- a/config/kernel-super-userns.m4 +++ b/config/kernel-super-userns.m4 @@ -3,15 +3,19 @@ dnl # 4.8 API change dnl # struct user_namespace was added to struct super_block as dnl # super->s_user_ns member dnl # -AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ - AC_MSG_CHECKING([whether super_block->s_user_ns exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_USER_NS], [ + ZFS_LINUX_TEST_SRC([super_user_ns], [ #include #include - ],[ + ], [ struct super_block super; super.s_user_ns = (struct user_namespace *)NULL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ + AC_MSG_CHECKING([whether super_block->s_user_ns exists]) + ZFS_LINUX_TEST_RESULT([super_user_ns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_USER_NS, 1, [super_block->s_user_ns exists]) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index b0e1afa153ab..403cff3f4189 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -6,15 +6,11 @@ dnl # (older kernels). Also sanity check the from_timer() and timer_setup() dnl # macros are available as well, since they will be used in the same newer dnl # kernels that support the new timer_list.func signature. dnl # -dnl # Also check for the existance of flags in struct timer_list, they were +dnl # Also check for the existence of flags in struct timer_list, they were dnl # added in 4.1-rc8 via 0eeda71bc30d. - -AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ - AC_MSG_CHECKING([whether timer_setup() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - - ZFS_LINUX_TRY_COMPILE([ +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ + ZFS_LINUX_TEST_SRC([timer_setup], [ #include struct my_task_timer { @@ -24,13 +20,34 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ void task_expire(struct timer_list *tl) { - struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); + struct my_task_timer *task_timer = + from_timer(task_timer, tl, timer); task_timer->data = 42; } ],[ struct my_task_timer task_timer; timer_setup(&task_timer.timer, task_expire, 0); + ]) + + ZFS_LINUX_TEST_SRC([timer_list_function], [ + #include + void task_expire(struct timer_list *tl) {} ],[ + struct timer_list tl; + tl.function = task_expire; + ]) + + ZFS_LINUX_TEST_SRC([timer_list_flags], [ + #include + ],[ + struct timer_list tl; + tl.flags = 2; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ + AC_MSG_CHECKING([whether timer_setup() is available]) + ZFS_LINUX_TEST_RESULT([timer_setup], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, [timer_setup() is available]) @@ -39,14 +56,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ ]) AC_MSG_CHECKING([whether timer function expects timer_list]) - - ZFS_LINUX_TRY_COMPILE([ - #include - void task_expire(struct timer_list *tl) {} - ],[ - struct timer_list tl; - tl.function = task_expire; - ],[ + ZFS_LINUX_TEST_RESULT([timer_list_function], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, [timer_list.function gets a timer_list]) @@ -55,19 +65,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ ]) AC_MSG_CHECKING([whether struct timer_list has flags]) - - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - struct timer_list tl; - tl.flags = 2; - ],[ + ZFS_LINUX_TEST_RESULT([timer_list_flags], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, [struct timer_list has a flags member]) ],[ AC_MSG_RESULT(no) ]) - - EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 index 5aad90450e8b..f510bfe6ba03 100644 --- a/config/kernel-tmpfile.m4 +++ b/config/kernel-tmpfile.m4 @@ -2,9 +2,8 @@ dnl # dnl # 3.11 API change dnl # Add support for i_op->tmpfile dnl # -AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ - AC_MSG_CHECKING([whether i_op->tmpfile() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ #include int tmpfile(struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } @@ -12,11 +11,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ iops __attribute__ ((unused)) = { .tmpfile = tmpfile, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ + AC_MSG_CHECKING([whether i_op->tmpfile() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, - [i_op->tmpfile() exists]) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-totalhigh_pages.m4 b/config/kernel-totalhigh_pages.m4 index b22e86d4dbc4..4ecb03a50a51 100644 --- a/config/kernel-totalhigh_pages.m4 +++ b/config/kernel-totalhigh_pages.m4 @@ -1,16 +1,18 @@ dnl # dnl # 5.0 API change dnl # -dnl # ca79b0c211af mm: convert totalram_pages and totalhigh_pages variables to atomic -dnl # -AC_DEFUN([ZFS_AC_KERNEL_TOTALHIGH_PAGES], [ - AC_MSG_CHECKING([whether totalhigh_pages() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES], [ + ZFS_LINUX_TEST_SRC([totalhigh_pages], [ #include ],[ unsigned long pages __attribute__ ((unused)); pages = totalhigh_pages(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TOTALHIGH_PAGES], [ + AC_MSG_CHECKING([whether totalhigh_pages() exists]) + ZFS_LINUX_TEST_RESULT([totalhigh_pages], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TOTALHIGH_PAGES, 1, [totalhigh_pages() exists]) ],[ diff --git a/config/kernel-totalram-pages-func.m4 b/config/kernel-totalram-pages-func.m4 index a6eac6454310..d0e812a8d2d2 100644 --- a/config/kernel-totalram-pages-func.m4 +++ b/config/kernel-totalram-pages-func.m4 @@ -2,16 +2,21 @@ dnl # dnl # Linux 5.0: totalram_pages is no longer a global variable, and must be dnl # read via the totalram_pages() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC], [ - AC_MSG_CHECKING([whether totalram_pages() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC], [ + ZFS_LINUX_TEST_SRC([totalram_pages], [ #include ],[ unsigned long pages __attribute__ ((unused)); pages = totalram_pages(); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC], [ + AC_MSG_CHECKING([whether totalram_pages() exists]) + ZFS_LINUX_TEST_RESULT([totalram_pages], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TOTALRAM_PAGES_FUNC, 1, [kernel has totalram_pages()]) + AC_DEFINE(HAVE_TOTALRAM_PAGES_FUNC, 1, + [kernel has totalram_pages()]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-truncate-range.m4 b/config/kernel-truncate-range.m4 index da2cb50fcbcc..8fdbb10869b0 100644 --- a/config/kernel-truncate-range.m4 +++ b/config/kernel-truncate-range.m4 @@ -4,17 +4,20 @@ dnl # torvalds/linux@17cf28afea2a1112f240a3a2da8af883be024811 removed dnl # truncate_range(). The file hole punching functionality is now dnl # provided by fallocate() dnl # -AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_RANGE], [ - AC_MSG_CHECKING([whether iops->truncate_range() exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE], [ + ZFS_LINUX_TEST_SRC([inode_operations_truncate_range], [ #include void truncate_range(struct inode *inode, loff_t start, loff_t end) { return; } static struct inode_operations iops __attribute__ ((unused)) = { .truncate_range = truncate_range, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_RANGE], [ + AC_MSG_CHECKING([whether iops->truncate_range() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_truncate_range], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_INODE_TRUNCATE_RANGE, 1, [iops->truncate_range() exists]) diff --git a/config/kernel-truncate-setsize.m4 b/config/kernel-truncate-setsize.m4 index 7e4aff479a90..e719c1444ab0 100644 --- a/config/kernel-truncate-setsize.m4 +++ b/config/kernel-truncate-setsize.m4 @@ -2,16 +2,21 @@ dnl # dnl # 2.6.35 API change dnl # Added truncate_setsize() helper function. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_SETSIZE], - [AC_MSG_CHECKING([whether truncate_setsize() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE], [ + ZFS_LINUX_TEST_SRC([truncate_setsize], [ #include ], [ truncate_setsize(NULL, 0); - ], [truncate_setsize], [mm/truncate.c], [ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_SETSIZE], [ + AC_MSG_CHECKING([whether truncate_setsize() is available]) + ZFS_LINUX_TEST_RESULT_SYMBOL([truncate_setsize], + [truncate_setsize], [mm/truncate.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_TRUNCATE_SETSIZE, 1, - [truncate_setsize() is available]) + [truncate_setsize() is available]) ], [ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-userns-capabilities.m4 b/config/kernel-userns-capabilities.m4 index fa3381978bfa..5dcbc03d3b13 100644 --- a/config/kernel-userns-capabilities.m4 +++ b/config/kernel-userns-capabilities.m4 @@ -2,16 +2,19 @@ dnl # dnl # 2.6.38 API change dnl # ns_capable() was introduced dnl # -AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ - AC_MSG_CHECKING([whether ns_capable exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_NS_CAPABLE], [ + ZFS_LINUX_TEST_SRC([ns_capable], [ #include ],[ ns_capable((struct user_namespace *)NULL, CAP_SYS_ADMIN); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ + AC_MSG_CHECKING([whether ns_capable exists]) + ZFS_LINUX_TEST_RESULT([ns_capable], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NS_CAPABLE, 1, - [ns_capable exists]) + AC_DEFINE(HAVE_NS_CAPABLE, 1, [ns_capable exists]) ],[ AC_MSG_RESULT(no) ]) @@ -23,17 +26,20 @@ dnl # struct user_namespace was added to struct cred_t as dnl # cred->user_ns member dnl # Note that current_user_ns() was added in 2.6.28. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CRED_USER_NS], [ - AC_MSG_CHECKING([whether cred_t->user_ns exists]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CRED_USER_NS], [ + ZFS_LINUX_TEST_SRC([cred_user_ns], [ #include ],[ struct cred cr; cr.user_ns = (struct user_namespace *)NULL; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CRED_USER_NS], [ + AC_MSG_CHECKING([whether cred_t->user_ns exists]) + ZFS_LINUX_TEST_RESULT([cred_user_ns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CRED_USER_NS, 1, - [cred_t->user_ns exists]) + AC_DEFINE(HAVE_CRED_USER_NS, 1, [cred_t->user_ns exists]) ],[ AC_MSG_RESULT(no) ]) @@ -44,14 +50,18 @@ dnl # 3.4 API change dnl # kuid_has_mapping() and kgid_has_mapping() were added to distinguish dnl # between internal kernel uids/gids and user namespace uids/gids. dnl # -AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ - AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING], [ + ZFS_LINUX_TEST_SRC([kuid_has_mapping], [ #include ],[ kuid_has_mapping((struct user_namespace *)NULL, KUIDT_INIT(0)); kgid_has_mapping((struct user_namespace *)NULL, KGIDT_INIT(0)); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ + AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist]) + ZFS_LINUX_TEST_RESULT([kuid_has_mapping], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KUID_HAS_MAPPING, 1, [kuid_has_mapping/kgid_has_mapping exist]) @@ -60,6 +70,12 @@ AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES], [ + ZFS_AC_KERNEL_SRC_NS_CAPABLE + ZFS_AC_KERNEL_SRC_CRED_USER_NS + ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING +]) + AC_DEFUN([ZFS_AC_KERNEL_USERNS_CAPABILITIES], [ ZFS_AC_KERNEL_NS_CAPABLE ZFS_AC_KERNEL_CRED_USER_NS diff --git a/config/kernel-urange-sleep.m4 b/config/kernel-usleep_range.m4 similarity index 60% rename from config/kernel-urange-sleep.m4 rename to config/kernel-usleep_range.m4 index b5764de3ed62..5bf051ab4fae 100644 --- a/config/kernel-urange-sleep.m4 +++ b/config/kernel-usleep_range.m4 @@ -1,20 +1,23 @@ dnl # -dnl # 2.6.36 API compatibility. -dnl # Added usleep_range timer. +dnl # 2.6.36 API compatibility- Added usleep_range timer. +dnl # dnl # usleep_range is a finer precision implementation of msleep dnl # designed to be a drop-in replacement for udelay where a precise dnl # sleep / busy-wait is unnecessary. dnl # -AC_DEFUN([ZFS_AC_KERNEL_USLEEP_RANGE], [ - AC_MSG_CHECKING([whether usleep_range() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_USLEEP_RANGE], [ + ZFS_LINUX_TEST_SRC([usleep_range], [ #include ],[ usleep_range(0, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_USLEEP_RANGE], [ + AC_MSG_CHECKING([whether usleep_range() is available]) + ZFS_LINUX_TEST_RESULT([usleep_range], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_USLEEP_RANGE, 1, - [usleep_range is available]) + AC_DEFINE(HAVE_USLEEP_RANGE, 1, [usleep_range is available]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 index cc50bfbe4e73..82583d52fcbc 100644 --- a/config/kernel-vfs-direct_IO.m4 +++ b/config/kernel-vfs-direct_IO.m4 @@ -1,9 +1,8 @@ dnl # -dnl # Linux 4.6.x API change +dnl # Check for direct IO interfaces. dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER], [ - AC_MSG_CHECKING([whether aops->direct_IO() uses iov_iter]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ + ZFS_LINUX_TEST_SRC([direct_io_iter], [ #include ssize_t test_direct_IO(struct kiocb *kiocb, @@ -13,24 +12,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER, 1, - [aops->direct_IO() uses iov_iter without rw]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Linux 4.1.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET], [ - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iov_iter with offset]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iter_offset], [ #include ssize_t test_direct_IO(struct kiocb *kiocb, @@ -40,24 +24,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_OFFSET, 1, - [aops->direct_IO() uses iov_iter with offset]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Linux 3.16.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET], [ - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iov_iter with rw and offset]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ #include ssize_t test_direct_IO(int rw, struct kiocb *kiocb, @@ -67,23 +36,9 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, - [aops->direct_IO() uses iov_iter with rw and offset]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) -]) + ],[]) -dnl # -dnl # Ancient Linux API (predates git) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC], [ - AC_MSG_CHECKING([whether aops->direct_IO() uses iovec]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([direct_io_iovec], [ #include ssize_t test_direct_IO(int rw, struct kiocb *kiocb, @@ -94,37 +49,61 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC], [ aops __attribute__ ((unused)) = { .direct_IO = test_direct_IO, }; - ],[ - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, - [aops->direct_IO() uses iovec]) - zfs_ac_direct_io="yes" - ],[ - AC_MSG_RESULT([no]) - ]) + ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ - zfs_ac_direct_io="no" - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_OFFSET - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_ITER_RW_OFFSET - fi - - if test "$zfs_ac_direct_io" = "no"; then - ZFS_AC_KERNEL_VFS_DIRECT_IO_IOVEC - fi + dnl # + dnl # Linux 4.6.x API change + dnl # + AC_MSG_CHECKING([whether aops->direct_IO() uses iov_iter]) + ZFS_LINUX_TEST_RESULT([direct_io_iter], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER, 1, + [aops->direct_IO() uses iov_iter without rw]) + ],[ + AC_MSG_RESULT([no]) - if test "$zfs_ac_direct_io" = "no"; then - AC_MSG_ERROR([no; unknown direct IO interface]) - fi + dnl # + dnl # Linux 4.1.x API change + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses offset]) + ZFS_LINUX_TEST_RESULT([direct_io_iter_offset], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_OFFSET, 1, + [aops->direct_IO() uses iov_iter with offset]) + + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Linux 3.16.x API change + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses rw and offset]) + ZFS_LINUX_TEST_RESULT([direct_io_iter_rw_offset], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, + [aops->direct_IO() uses iov_iter with ] + [rw and offset]) + ],[ + AC_MSG_RESULT([no]) + + dnl # + dnl # Ancient Linux API (predates git) + dnl # + AC_MSG_CHECKING( + [whether aops->direct_IO() uses iovec]) + ZFS_LINUX_TEST_RESULT([direct_io_iovec], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, + [aops->direct_IO() uses iovec]) + ],[ + ZFS_LINUX_TEST_ERROR([direct IO]) + AC_MSG_RESULT([no]) + ]) + ]) + ]) + ]) ]) diff --git a/config/kernel-vfs-fsync.m4 b/config/kernel-vfs-fsync.m4 index a474f9f1745d..18a60d29aaed 100644 --- a/config/kernel-vfs-fsync.m4 +++ b/config/kernel-vfs-fsync.m4 @@ -2,13 +2,17 @@ dnl # dnl # 2.6.35 API change, dnl # Unused 'struct dentry *' removed from vfs_fsync() prototype. dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_VFS_FSYNC], [ - AC_MSG_CHECKING([whether vfs_fsync() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_fsync_2args], [ #include ],[ vfs_fsync(NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_FSYNC_2ARGS], [ + AC_MSG_CHECKING([whether vfs_fsync() wants 2 args]) + ZFS_LINUX_TEST_RESULT([vfs_fsync_2args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_VFS_FSYNC, 1, [vfs_fsync() wants 2 args]) ],[ diff --git a/config/kernel-vfs-getattr.m4 b/config/kernel-vfs-getattr.m4 index b13723538f15..eb07853cc4b9 100644 --- a/config/kernel-vfs-getattr.m4 +++ b/config/kernel-vfs-getattr.m4 @@ -2,19 +2,23 @@ dnl # dnl # 4.11 API, a528d35e@torvalds/linux dnl # vfs_getattr(const struct path *p, struct kstat *s, u32 m, unsigned int f) dnl # -AC_DEFUN([ZFS_AC_KERNEL_4ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_4args], [ #include ],[ vfs_getattr((const struct path *)NULL, (struct kstat *)NULL, (u32)0, (unsigned int)0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_4ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_4args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_4ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 4 args]) + [vfs_getattr wants 4 args]) ],[ AC_MSG_RESULT(no) ]) @@ -24,17 +28,21 @@ dnl # dnl # 3.9 API dnl # vfs_getattr(struct path *p, struct kstat *s) dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_2args], [ #include ],[ vfs_getattr((struct path *) NULL, (struct kstat *)NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_2ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_2args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 2 args]) + [vfs_getattr wants 2 args]) ],[ AC_MSG_RESULT(no) ]) @@ -44,19 +52,35 @@ dnl # dnl # <3.9 API dnl # vfs_getattr(struct vfsmount *v, struct dentry *d, struct kstat *k) dnl # -AC_DEFUN([ZFS_AC_KERNEL_3ARGS_VFS_GETATTR], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS], [ + ZFS_LINUX_TEST_SRC([vfs_getattr_3args], [ #include ],[ vfs_getattr((struct vfsmount *)NULL, (struct dentry *)NULL, (struct kstat *)NULL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_3ARGS], [ + AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) + ZFS_LINUX_TEST_RESULT([vfs_getattr_3args], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_3ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 3 args]) + [vfs_getattr wants 3 args]) ],[ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR], [ + ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS + ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS + ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR], [ + ZFS_AC_KERNEL_VFS_GETATTR_4ARGS + ZFS_AC_KERNEL_VFS_GETATTR_2ARGS + ZFS_AC_KERNEL_VFS_GETATTR_3ARGS +]) diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4 index 5de901d4462e..172118eac87b 100644 --- a/config/kernel-vfs-iterate.m4 +++ b/config/kernel-vfs-iterate.m4 @@ -1,9 +1,5 @@ -AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ - dnl # - dnl # 4.7 API change - dnl # - AC_MSG_CHECKING([whether fops->iterate_shared() is available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ + ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ #include int iterate(struct file *filp, struct dir_context * context) { return 0; } @@ -12,11 +8,44 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ __attribute__ ((unused)) = { .iterate_shared = iterate, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([file_operations_iterate], [ + #include + int iterate(struct file *filp, + struct dir_context *context) { return 0; } + + static const struct file_operations fops + __attribute__ ((unused)) = { + .iterate = iterate, + }; + + #if defined(FMODE_KABI_ITERATE) + #error "RHEL 7.5, FMODE_KABI_ITERATE interface" + #endif + ],[]) + + ZFS_LINUX_TEST_SRC([file_operations_readdir], [ + #include + int readdir(struct file *filp, void *entry, + filldir_t func) { return 0; } + + static const struct file_operations fops + __attribute__ ((unused)) = { + .readdir = readdir, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ + dnl # + dnl # 4.7 API change + dnl # + AC_MSG_CHECKING([whether fops->iterate_shared() is available]) + ZFS_LINUX_TEST_RESULT([file_operations_iterate_shared], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_ITERATE_SHARED, 1, - [fops->iterate_shared() is available]) + [fops->iterate_shared() is available]) ],[ AC_MSG_RESULT(no) @@ -31,44 +60,23 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ dnl # to using fops.readdir() to retain KABI compatibility. dnl # AC_MSG_CHECKING([whether fops->iterate() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - int iterate(struct file *filp, - struct dir_context *context) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate = iterate, - }; - - #if defined(FMODE_KABI_ITERATE) - #error "RHEL 7.5, FMODE_KABI_ITERATE interface" - #endif - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([file_operations_iterate], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_ITERATE, 1, - [fops->iterate() is available]) + [fops->iterate() is available]) ],[ AC_MSG_RESULT(no) + dnl # + dnl # readdir interface introduced + dnl # AC_MSG_CHECKING([whether fops->readdir() is available]) - ZFS_LINUX_TRY_COMPILE([ - #include - int readdir(struct file *filp, void *entry, - filldir_t func) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .readdir = readdir, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([file_operations_readdir], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_VFS_READDIR, 1, - [fops->readdir() is available]) + [fops->readdir() is available]) ],[ - AC_MSG_ERROR(no; file a bug report with ZoL) + ZFS_LINUX_TEST_ERROR([vfs_iterate]) ]) ]) ]) diff --git a/config/kernel-vfs-rw-iterate.m4 b/config/kernel-vfs-rw-iterate.m4 index ace54f70711f..000353ec15b0 100644 --- a/config/kernel-vfs-rw-iterate.m4 +++ b/config/kernel-vfs-rw-iterate.m4 @@ -1,9 +1,8 @@ dnl # dnl # Linux 3.16 API dnl # -AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], - [AC_MSG_CHECKING([whether fops->read/write_iter() are available]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ + ZFS_LINUX_TEST_SRC([file_operations_rw], [ #include ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) @@ -16,39 +15,41 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], .read_iter = test_read, .write_iter = test_write, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, - [fops->read/write_iter() are available]) + ],[]) - ZFS_AC_KERNEL_NEW_SYNC_READ + ZFS_LINUX_TEST_SRC([new_sync_rw], [ + #include ],[ - AC_MSG_RESULT(no) + ssize_t ret __attribute__ ((unused)); + struct file *filp = NULL; + char __user *rbuf = NULL; + const char __user *wbuf = NULL; + size_t len = 0; + loff_t ppos; + + ret = new_sync_read(filp, rbuf, len, &ppos); + ret = new_sync_write(filp, wbuf, len, &ppos); ]) ]) -dnl # -dnl # Linux 4.1 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_NEW_SYNC_READ], - [AC_MSG_CHECKING([whether new_sync_read/write() are available]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - ssize_t ret __attribute__ ((unused)); - struct file *filp = NULL; - char __user *rbuf = NULL; - const char __user *wbuf = NULL; - size_t len = 0; - loff_t ppos; - - ret = new_sync_read(filp, rbuf, len, &ppos); - ret = new_sync_write(filp, wbuf, len, &ppos); - ],[ +AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], [ + AC_MSG_CHECKING([whether fops->read/write_iter() are available]) + ZFS_LINUX_TEST_RESULT([file_operations_rw], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NEW_SYNC_READ, 1, - [new_sync_read()/new_sync_write() are available]) + AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, + [fops->read/write_iter() are available]) + + dnl # + dnl # Linux 4.1 API + dnl # + AC_MSG_CHECKING([whether new_sync_read/write() are available]) + ZFS_LINUX_TEST_RESULT([new_sync_rw], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_NEW_SYNC_READ, 1, + [new_sync_read()/new_sync_write() are available]) + ],[ + AC_MSG_RESULT(no) + ]) ],[ AC_MSG_RESULT(no) ]) @@ -57,19 +58,22 @@ AC_DEFUN([ZFS_AC_KERNEL_NEW_SYNC_READ], dnl # dnl # Linux 4.1.x API dnl # -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_WRITE_CHECKS], - [AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS], [ + ZFS_LINUX_TEST_SRC([generic_write_checks], [ #include - ],[ struct kiocb *iocb = NULL; struct iov_iter *iov = NULL; generic_write_checks(iocb, iov); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS], [ + AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) + ZFS_LINUX_TEST_RESULT([generic_write_checks], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GENERIC_WRITE_CHECKS_KIOCB, 1, - [generic_write_checks() takes kiocb]) + [generic_write_checks() takes kiocb]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel-wait.m4 b/config/kernel-wait.m4 index d6442c1df6fb..0414242bf6d4 100644 --- a/config/kernel-wait.m4 +++ b/config/kernel-wait.m4 @@ -1,3 +1,26 @@ +dnl # +dnl # 4.13 API change +dnl # Renamed struct wait_queue -> struct wait_queue_entry. +dnl # +dnl # N.B. The type check is performed before all other checks +dnl # since ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY depends on +dnl # HAVE_WAIT_QUEUE_ENTRY_T being set in confdefs.h. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ + AC_MSG_CHECKING([whether wait_queue_entry_t exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + wait_queue_entry_t *entry __attribute__ ((unused)); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, + [wait_queue_entry_t exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 3.17 API change, dnl # wait_on_bit() no longer requires an action argument. The former @@ -8,34 +31,20 @@ dnl # of just two functions: one which uses io_schedule() and one which just dnl # uses schedule(). This API change was made to consolidate all of those dnl # redundant wait functions. dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ - AC_MSG_CHECKING([whether wait_on_bit() takes an action]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_ON_BIT], [ + ZFS_LINUX_TEST_SRC([wait_on_bit], [ #include ],[ int (*action)(void *) = NULL; wait_on_bit(NULL, 0, action, 0); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) - ],[ - AC_MSG_RESULT(no) ]) ]) -dnl # -dnl # 4.13 API change -dnl # Renamed struct wait_queue -> struct wait_queue_entry. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ - AC_MSG_CHECKING([whether wait_queue_entry_t exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - wait_queue_entry_t *entry __attribute__ ((unused)); - ],[ + +AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ + AC_MSG_CHECKING([whether wait_on_bit() takes an action]) + ZFS_LINUX_TEST_RESULT([wait_on_bit], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, - [wait_queue_entry_t exists]) + AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) ],[ AC_MSG_RESULT(no) ]) @@ -46,9 +55,8 @@ dnl # 4.13 API change dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ - AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY], [ + ZFS_LINUX_TEST_SRC([wait_queue_head_entry], [ #include #ifdef HAVE_WAIT_QUEUE_ENTRY_T @@ -66,7 +74,12 @@ AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ head = &wq_head.head; entry = &wq_entry.entry; - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ + AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) + ZFS_LINUX_TEST_RESULT([wait_queue_head_entry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_WAIT_QUEUE_HEAD_ENTRY, 1, [wq_head->head and wq_entry->entry exist]) @@ -74,3 +87,13 @@ AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ AC_MSG_RESULT(no) ]) ]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT], [ + ZFS_AC_KERNEL_SRC_WAIT_ON_BIT + ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY +]) + +AC_DEFUN([ZFS_AC_KERNEL_WAIT], [ + ZFS_AC_KERNEL_WAIT_ON_BIT + ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY +]) diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 index 0b61b85b1d45..ed84c63902f2 100644 --- a/config/kernel-xattr-handler.m4 +++ b/config/kernel-xattr-handler.m4 @@ -3,9 +3,8 @@ dnl # 2.6.35 API change, dnl # The 'struct xattr_handler' was constified in the generic dnl # super_block structure. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ - AC_MSG_CHECKING([whether super_block uses const struct xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER], [ + ZFS_LINUX_TEST_SRC([const_xattr_handler], [ #include #include @@ -22,11 +21,15 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ const struct super_block sb __attribute__ ((unused)) = { .s_xattr = xattr_handlers, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ + AC_MSG_CHECKING([whether super_block uses const struct xattr_handler]) + ZFS_LINUX_TEST_RESULT([const_xattr_handler], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_CONST_XATTR_HANDLER, 1, - [super_block uses const struct xattr_handler]) + [super_block uses const struct xattr_handler]) ],[ AC_MSG_RESULT([no]) ]) @@ -38,17 +41,20 @@ dnl # struct xattr_handler added new member "name". dnl # xattr_handler which matches to whole name rather than prefix should use dnl # "name" instead of "prefix", e.g. "system.posix_acl_access" dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ - AC_MSG_CHECKING([whether xattr_handler has name]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME], [ + ZFS_LINUX_TEST_SRC([xattr_handler_name], [ #include static const struct xattr_handler xops __attribute__ ((unused)) = { .name = XATTR_NAME_POSIX_ACL_ACCESS, }; - ],[ - ],[ + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ + AC_MSG_CHECKING([whether xattr_handler has name]) + ZFS_LINUX_TEST_RESULT([xattr_handler_name], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_HANDLER_NAME, 1, [xattr_handler has name]) @@ -58,52 +64,65 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ ]) dnl # -dnl # 4.9 API change, -dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are -dnl # removed. xattr operations will directly go through sb->s_xattr. +dnl # Supported xattr handler get() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR], [ - AC_MSG_CHECKING([whether generic_setxattr() exists]) - ZFS_LINUX_TRY_COMPILE([ - #include +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ + ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ #include - static const struct inode_operations - iops __attribute__ ((unused)) = { - .setxattr = generic_setxattr + int get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, }; - ],[ - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, - [generic_setxattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) + ],[]) -dnl # -dnl # Supported xattr handler get() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->get() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->get() wants both dentry and inode]) - ZFS_LINUX_TRY_COMPILE([ + ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ #include int get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) { return 0; } + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return 0; } static const struct xattr_handler xops __attribute__ ((unused)) = { .get = get, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ + #include + + int get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int handler_flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_get_inode], [ + #include + + int get(struct inode *ip, const char *name, + void *buffer, size_t size) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .get = get, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ + dnl # + dnl # 4.7 API change, + dnl # The xattr_handler->get() callback was changed to take both + dnl # dentry and inode. + dnl # + AC_MSG_CHECKING([whether xattr_handler->get() wants dentry and inode]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE, 1, [xattr_handler->get() wants both dentry and inode]) @@ -115,69 +134,40 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ dnl # should be accessed by handler->flags. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->get() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->get() wants xattr_handler]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_HANDLER, 1, [xattr_handler->get() wants xattr_handler]) ],[ dnl # dnl # 2.6.33 API change, - dnl # The xattr_handler->get() callback was changed to take - dnl # a dentry instead of an inode, and a handler_flags - dnl # argument was added. + dnl # The xattr_handler->get() callback was changed + dnl # to take a dentry instead of an inode, and a + dnl # handler_flags argument was added. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->get() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int handler_flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->get() wants dentry]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1, [xattr_handler->get() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->get() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int get(struct inode *ip, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_get_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_GET_INODE, 1, [xattr_handler->get() wants inode]) ],[ - AC_MSG_ERROR([no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr get()]) ]) ]) ]) @@ -187,14 +177,8 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ dnl # dnl # Supported xattr handler set() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->set() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->set() wants both dentry and inode]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ + ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [ #include int set(const struct xattr_handler *handler, @@ -206,8 +190,54 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ xops __attribute__ ((unused)) = { .set = set, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ + #include + + int set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ + #include + + int set(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, + int handler_flags) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_set_inode], [ + #include + + int set(struct inode *ip, const char *name, + const void *buffer, size_t size, int flags) + { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .set = set, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ + dnl # + dnl # 4.7 API change, + dnl # The xattr_handler->set() callback was changed to take both + dnl # dentry and inode. + dnl # + AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, [xattr_handler->set() wants both dentry and inode]) @@ -219,71 +249,40 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ dnl # should be accessed by handler->flags. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->set() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->set() wants xattr_handler]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, [xattr_handler->set() wants xattr_handler]) ],[ dnl # dnl # 2.6.33 API change, - dnl # The xattr_handler->set() callback was changed to take a - dnl # dentry instead of an inode, and a handler_flags - dnl # argument was added. + dnl # The xattr_handler->set() callback was changed + dnl # to take a dentry instead of an inode, and a + dnl # handler_flags argument was added. dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether xattr_handler->set() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + AC_MSG_CHECKING( + [whether xattr_handler->set() wants dentry]) + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, [xattr_handler->set() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->set() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - int set(struct inode *ip, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_set_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_SET_INODE, 1, [xattr_handler->set() wants inode]) ],[ - AC_MSG_ERROR([no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr set()]) ]) ]) ]) @@ -293,12 +292,8 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ dnl # dnl # Supported xattr handler list() interfaces checked newest to oldest. dnl # -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ - dnl # 4.5 API change, - dnl # The xattr_handler->list() callback was changed to take only a - dnl # dentry and it only needs to return if it's accessible. - AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ + ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ #include bool list(struct dentry *dentry) { return 0; } @@ -306,8 +301,52 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ xops __attribute__ ((unused)) = { .list = list, }; - ],[ - ],[ + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ + #include + + size_t list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ + #include + + size_t list(struct dentry *dentry, + char *list, size_t list_size, + const char *name, size_t name_len, + int handler_flags) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([xattr_handler_list_inode], [ + #include + + size_t list(struct inode *ip, char *lst, + size_t list_size, const char *name, + size_t name_len) { return 0; } + static const struct xattr_handler + xops __attribute__ ((unused)) = { + .list = list, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ + dnl # 4.5 API change, + dnl # The xattr_handler->list() callback was changed to take only a + dnl # dentry and it only needs to return if it's accessible. + AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) + ZFS_LINUX_TEST_RESULT([xattr_handler_list_simple], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_SIMPLE, 1, [xattr_handler->list() wants simple]) @@ -321,18 +360,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants xattr_handler]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(const struct xattr_handler *handler, - struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([xattr_handler_list_xattr_handler], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_HANDLER, 1, [xattr_handler->list() wants xattr_handler]) @@ -346,47 +374,24 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants dentry]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(struct dentry *dentry, - char *list, size_t list_size, - const char *name, size_t name_len, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT([xattr_handler_list_dentry], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_DENTRY, 1, [xattr_handler->list() wants dentry]) ],[ dnl # - dnl # 2.6.32 API + dnl # Legacy 2.6.32 API dnl # AC_MSG_RESULT(no) AC_MSG_CHECKING( [whether xattr_handler->list() wants inode]) - ZFS_LINUX_TRY_COMPILE([ - #include - - size_t list(struct inode *ip, char *lst, - size_t list_size, const char *name, - size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[ - ],[ + ZFS_LINUX_TEST_RESULT( + [xattr_handler_list_inode], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_XATTR_LIST_INODE, 1, [xattr_handler->list() wants inode]) ],[ - AC_MSG_ERROR( - [no; please file a bug report]) + ZFS_LINUX_TEST_ERROR([xattr list()]) ]) ]) ]) @@ -398,15 +403,19 @@ dnl # 3.7 API change, dnl # The posix_acl_{from,to}_xattr functions gained a new dnl # parameter: user_ns dnl # -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ - AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS], [ + ZFS_LINUX_TEST_SRC([posix_acl_from_xattr_userns], [ #include #include #include ],[ posix_acl_from_xattr(&init_user_ns, NULL, 0); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ + AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) + ZFS_LINUX_TEST_RESULT([posix_acl_from_xattr_userns], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_POSIX_ACL_FROM_XATTR_USERNS, 1, [posix_acl_from_xattr() needs user_ns]) @@ -415,3 +424,50 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ ]) ]) +dnl # +dnl # 4.9 API change, +dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are +dnl # removed. xattr operations will directly go through sb->s_xattr. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR], [ + ZFS_LINUX_TEST_SRC([have_generic_setxattr], [ + #include + #include + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .setxattr = generic_setxattr + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_SETXATTR], [ + AC_MSG_CHECKING([whether generic_setxattr() exists]) + ZFS_LINUX_TEST_RESULT([have_generic_setxattr], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, + [generic_setxattr() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR], [ + ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST + ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS + ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR +]) + +AC_DEFUN([ZFS_AC_KERNEL_XATTR], [ + ZFS_AC_KERNEL_CONST_XATTR_HANDLER + ZFS_AC_KERNEL_XATTR_HANDLER_NAME + ZFS_AC_KERNEL_XATTR_HANDLER_GET + ZFS_AC_KERNEL_XATTR_HANDLER_SET + ZFS_AC_KERNEL_XATTR_HANDLER_LIST + ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS + ZFS_AC_KERNEL_GENERIC_SETXATTR +]) diff --git a/config/kernel-zlib.m4 b/config/kernel-zlib.m4 index 3ca7cf682dad..d554d1168e76 100644 --- a/config/kernel-zlib.m4 +++ b/config/kernel-zlib.m4 @@ -1,62 +1,25 @@ -dnl # -dnl # zlib inflate compat, -dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE], [ - AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #if !defined(CONFIG_ZLIB_INFLATE) && \ - !defined(CONFIG_ZLIB_INFLATE_MODULE) - #error CONFIG_ZLIB_INFLATE not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([ - *** This kernel does not include the required zlib inflate support. - *** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.]) - ]) -]) - -dnl # -dnl # zlib deflate compat, -dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE], [ - AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined]) - ZFS_LINUX_TRY_COMPILE([ - #if !defined(CONFIG_ZLIB_DEFLATE) && \ - !defined(CONFIG_ZLIB_DEFLATE_MODULE) - #error CONFIG_ZLIB_DEFLATE not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([ - *** This kernel does not include the required zlib deflate support. - *** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.]) - ]) -]) - dnl # dnl # 2.6.39 API compat, +dnl dnl # The function zlib_deflate_workspacesize() now take 2 arguments. dnl # This was done to avoid always having to allocate the maximum size dnl # workspace (268K). The caller can now specific the windowBits and dnl # memLevel compression parameters to get a smaller workspace. dnl # -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], - [AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) - ZFS_LINUX_TRY_COMPILE([ +AC_DEFUN([ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ + ZFS_LINUX_TEST_SRC([2args_zlib_deflate_workspacesize], [ #include ],[ return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); - ],[ + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ + AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) + ZFS_LINUX_TEST_RESULT([2args_zlib_deflate_workspacesize], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1, - [zlib_deflate_workspacesize() wants 2 args]) + [zlib_deflate_workspacesize() wants 2 args]) ],[ AC_MSG_RESULT(no) ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 54f39164bb5f..dce619729d4c 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -2,113 +2,216 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ + dnl # Setup the kernel build environment. ZFS_AC_KERNEL ZFS_AC_QAT - ZFS_AC_KERNEL_ACCESS_OK_TYPE - ZFS_AC_TEST_MODULE + + dnl # Sanity checks for module building and CONFIG_* defines + ZFS_AC_KERNEL_TEST_MODULE + ZFS_AC_KERNEL_CONFIG_DEFINED + + dnl # Sequential ZFS_LINUX_TRY_COMPILE tests + ZFS_AC_KERNEL_FPU_HEADER + ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T ZFS_AC_KERNEL_MISC_MINOR + ZFS_AC_KERNEL_DECLARE_EVENT_CLASS + + dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests + ZFS_AC_KERNEL_TEST_SRC + ZFS_AC_KERNEL_TEST_RESULT + + AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ + KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" + ]) + + AC_SUBST(KERNEL_MAKE) +]) + +dnl # +dnl # Generate and compile all of the kernel API test cases to determine +dnl # which interfaces are available. By invoking the kernel build system +dnl # only once the compilation can be done in parallel significantly +dnl # speeding up the process. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_OBJTOOL + ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE + ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE + ZFS_AC_KERNEL_SRC_CTL_NAME + ZFS_AC_KERNEL_SRC_PDE_DATA + ZFS_AC_KERNEL_SRC_FALLOCATE + ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE + ZFS_AC_KERNEL_SRC_RWSEM + ZFS_AC_KERNEL_SRC_SCHED + ZFS_AC_KERNEL_SRC_USLEEP_RANGE + ZFS_AC_KERNEL_SRC_KMEM_CACHE + ZFS_AC_KERNEL_SRC_WAIT + ZFS_AC_KERNEL_SRC_INODE_TIMES + ZFS_AC_KERNEL_SRC_INODE_LOCK + ZFS_AC_KERNEL_SRC_GROUP_INFO_GID + ZFS_AC_KERNEL_SRC_RW + ZFS_AC_KERNEL_SRC_TIMER_SETUP + ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL + ZFS_AC_KERNEL_SRC_SUPER_USER_NS + ZFS_AC_KERNEL_SRC_SUBMIT_BIO + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS + ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH + ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART + ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV + ZFS_AC_KERNEL_SRC_LOOKUP_BDEV + ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE + ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE + ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE + ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER + ZFS_AC_KERNEL_SRC_BIO_FAILFAST + ZFS_AC_KERNEL_SRC_BIO_SET_DEV + ZFS_AC_KERNEL_SRC_BIO_OPS + ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS + ZFS_AC_KERNEL_SRC_BIO_BI_STATUS + ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER + ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD + ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI + ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD + ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH + ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS + ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG + ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE + ZFS_AC_KERNEL_SRC_GET_DISK_RO + ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL + ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY + ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE + ZFS_AC_KERNEL_SRC_XATTR + ZFS_AC_KERNEL_SRC_ACL + ZFS_AC_KERNEL_SRC_INODE_GETATTR + ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS + ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION + ZFS_AC_KERNEL_SRC_SHOW_OPTIONS + ZFS_AC_KERNEL_SRC_FILE_INODE + ZFS_AC_KERNEL_SRC_FILE_DENTRY + ZFS_AC_KERNEL_SRC_FSYNC + ZFS_AC_KERNEL_SRC_AIO_FSYNC + ZFS_AC_KERNEL_SRC_EVICT_INODE + ZFS_AC_KERNEL_SRC_DIRTY_INODE + ZFS_AC_KERNEL_SRC_SHRINKER + ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T + ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA + ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA + ZFS_AC_KERNEL_SRC_GET_LINK + ZFS_AC_KERNEL_SRC_PUT_LINK + ZFS_AC_KERNEL_SRC_TMPFILE + ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE + ZFS_AC_KERNEL_SRC_AUTOMOUNT + ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE + ZFS_AC_KERNEL_SRC_COMMIT_METADATA + ZFS_AC_KERNEL_SRC_CLEAR_INODE + ZFS_AC_KERNEL_SRC_SETATTR_PREPARE + ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED + ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE + ZFS_AC_KERNEL_SRC_SECURITY_INODE + ZFS_AC_KERNEL_SRC_FST_MOUNT + ZFS_AC_KERNEL_SRC_BDI + ZFS_AC_KERNEL_SRC_SET_NLINK + ZFS_AC_KERNEL_SRC_SGET + ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE + ZFS_AC_KERNEL_SRC_VFS_GETATTR + ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS + ZFS_AC_KERNEL_SRC_VFS_ITERATE + ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO + ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE + ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS + ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS + ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE + ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN + ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT + ZFS_AC_KERNEL_SRC_FPU + ZFS_AC_KERNEL_SRC_FMODE_T + ZFS_AC_KERNEL_SRC_KUIDGID_T + ZFS_AC_KERNEL_SRC_KUID_HELPERS + ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST + ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS + ZFS_AC_KERNEL_SRC_CURRENT_TIME + ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES + ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL + ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 + ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC + ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES + ZFS_AC_KERNEL_SRC_KSTRTOUL + + AC_MSG_CHECKING([for available kernel interfaces]) + ZFS_LINUX_TEST_COMPILE_ALL([kabi]) + AC_MSG_RESULT([done]) +]) + +dnl # +dnl # Check results of kernel interface tests. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_ACCESS_OK_TYPE + ZFS_AC_KERNEL_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_OBJTOOL - ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_CTL_NAME ZFS_AC_KERNEL_PDE_DATA - ZFS_AC_KERNEL_SET_FS_PWD_WITH_CONST - ZFS_AC_KERNEL_2ARGS_VFS_FSYNC - ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK - ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW - ZFS_AC_KERNEL_RWSEM_ACTIVITY - ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT - ZFS_AC_KERNEL_SCHED_RT_HEADER - ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT - ZFS_AC_KERNEL_4ARGS_VFS_GETATTR - ZFS_AC_KERNEL_3ARGS_VFS_GETATTR - ZFS_AC_KERNEL_2ARGS_VFS_GETATTR + ZFS_AC_KERNEL_RWSEM + ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE - ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS - ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY - ZFS_AC_KERNEL_WAIT_ON_BIT - ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T - ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY + ZFS_AC_KERNEL_KMEM_CACHE + ZFS_AC_KERNEL_WAIT ZFS_AC_KERNEL_INODE_TIMES ZFS_AC_KERNEL_INODE_LOCK ZFS_AC_KERNEL_GROUP_INFO_GID - ZFS_AC_KERNEL_WRITE - ZFS_AC_KERNEL_READ + ZFS_AC_KERNEL_RW ZFS_AC_KERNEL_TIMER_SETUP - ZFS_AC_KERNEL_DECLARE_EVENT_CLASS ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_SUBMIT_BIO - ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS - ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID - ZFS_AC_KERNEL_TYPE_FMODE_T + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_BLKDEV_REREAD_PART - ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE + ZFS_AC_KERNEL_INVALIDATE_BDEV ZFS_AC_KERNEL_LOOKUP_BDEV - ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS + ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE ZFS_AC_KERNEL_BIO_BVEC_ITER - ZFS_AC_KERNEL_BIO_FAILFAST_DTD + ZFS_AC_KERNEL_BIO_FAILFAST ZFS_AC_KERNEL_BIO_SET_DEV - ZFS_AC_KERNEL_REQ_FAILFAST_MASK - ZFS_AC_KERNEL_REQ_OP_DISCARD - ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE - ZFS_AC_KERNEL_REQ_OP_FLUSH - ZFS_AC_KERNEL_BIO_BI_OPF + ZFS_AC_KERNEL_BIO_OPS ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_BI_STATUS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_BDI - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET + ZFS_AC_KERNEL_BLK_QUEUE_DISCARD + ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE + ZFS_AC_KERNEL_BLK_QUEUE_FLAGS ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG - ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG + ZFS_AC_KERNEL_BLK_QUEUE_PLUG ZFS_AC_KERNEL_GET_DISK_AND_MODULE ZFS_AC_KERNEL_GET_DISK_RO - ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_DISCARD_GRANULARITY - ZFS_AC_KERNEL_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_XATTR_HANDLER_GET - ZFS_AC_KERNEL_XATTR_HANDLER_SET - ZFS_AC_KERNEL_XATTR_HANDLER_LIST ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE - ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_POSIX_ACL_CHMOD - ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS - ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION - ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA - ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS - ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_INODE_OPERATIONS_GETATTR + ZFS_AC_KERNEL_XATTR + ZFS_AC_KERNEL_ACL + ZFS_AC_KERNEL_INODE_GETATTR ZFS_AC_KERNEL_INODE_SET_FLAGS ZFS_AC_KERNEL_INODE_SET_IVERSION - ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_FILE_INODE ZFS_AC_KERNEL_FILE_DENTRY ZFS_AC_KERNEL_FSYNC - ZFS_AC_KERNEL_EVICT_INODE - ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS - ZFS_AC_KERNEL_NR_CACHED_OBJECTS - ZFS_AC_KERNEL_FREE_CACHED_OBJECTS - ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_AIO_FSYNC + ZFS_AC_KERNEL_EVICT_INODE + ZFS_AC_KERNEL_DIRTY_INODE + ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR_UMODE_T ZFS_AC_KERNEL_LOOKUP_NAMEIDATA ZFS_AC_KERNEL_CREATE_NAMEIDATA @@ -122,57 +225,37 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CLEAR_INODE ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_D_MAKE_ROOT - ZFS_AC_KERNEL_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_D_PRUNE_ALIASES - ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS + ZFS_AC_KERNEL_DENTRY ZFS_AC_KERNEL_TRUNCATE_SETSIZE - ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY - ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY + ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT - ZFS_AC_KERNEL_SHRINK - ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT - ZFS_AC_KERNEL_SHRINKER_CALLBACK - ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD - ZFS_AC_KERNEL_S_D_OP ZFS_AC_KERNEL_BDI ZFS_AC_KERNEL_SET_NLINK - ZFS_AC_KERNEL_ELEVATOR_CHANGE - ZFS_AC_KERNEL_5ARG_SGET + ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_LSEEK_EXECUTE + ZFS_AC_KERNEL_VFS_GETATTR + ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_ITERATE - ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_VFS_DIRECT_IO - ZFS_AC_KERNEL_GENERIC_WRITE_CHECKS + ZFS_AC_KERNEL_VFS_RW_ITERATE + ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN - ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG - ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG + ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU + ZFS_AC_KERNEL_FMODE_T + ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST ZFS_AC_KERNEL_RENAME_WANTS_FLAGS - ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR ZFS_AC_KERNEL_CURRENT_TIME - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE - ZFS_AC_KERNEL_ACL_HAS_REFCOUNT ZFS_AC_KERNEL_USERNS_CAPABILITIES ZFS_AC_KERNEL_IN_COMPAT_SYSCALL ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES - ZFS_AC_KERNEL_BLK_QUEUE_DISCARD - ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE - - AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ - KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" - ]) - - AC_SUBST(KERNEL_MAKE) + ZFS_AC_KERNEL_KSTRTOUL ]) dnl # @@ -191,9 +274,10 @@ AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution - *** is installed. If you are building with a custom kernel, make sure the - *** kernel is configured, built, and the '--with-linux=PATH' configure - *** option refers to the location of the kernel source.]) + *** is installed. If you are building with a custom kernel, make sure + *** the kernel is configured, built, and the '--with-linux=PATH' + *** configure option refers to the location of the kernel source. + ]) ]) ], [ LINUX_SYMBOLS=NONE @@ -286,12 +370,16 @@ AC_DEFUN([ZFS_AC_KERNEL], [ AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) - AC_MSG_ERROR([*** Cannot determine kernel version.]) + AC_MSG_ERROR([ + *** Cannot determine kernel version. + ]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then - AC_MSG_ERROR([*** Cannot find UTS_RELEASE definition.]) + AC_MSG_ERROR([ + *** Cannot find UTS_RELEASE definition. + ]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. @@ -313,24 +401,27 @@ AC_DEFUN([ZFS_AC_KERNEL], [ ]) dnl # -dnl # Detect the QAT module to be built against -dnl # QAT provides hardware acceleration for data compression: -dnl # https://01.org/intel-quickassist-technology -dnl # * Download and install QAT driver from the above link -dnl # * Start QAT driver in your system: -dnl # service qat_service start -dnl # * Enable QAT in ZFS, e.g.: -dnl # ./configure --with-qat=/QAT1.6 -dnl # make -dnl # * Set GZIP compression in ZFS dataset: -dnl # zfs set compression = gzip -dnl # Then the data written to this ZFS pool is compressed -dnl # by QAT accelerator automatically, and de-compressed by -dnl # QAT when read from the pool. -dnl # * Get QAT hardware statistics by: -dnl # cat /proc/icp_dh895xcc_dev/qat -dnl # * To disable QAT: -dnl # insmod zfs.ko zfs_qat_disable=1 +dnl # Detect the QAT module to be built against, QAT provides hardware +dnl # acceleration for data compression: +dnl # +dnl # https://01.org/intel-quickassist-technology +dnl # +dnl # 1) Download and install QAT driver from the above link +dnl # 2) Start QAT driver in your system: +dnl # service qat_service start +dnl # 3) Enable QAT in ZFS, e.g.: +dnl # ./configure --with-qat=/QAT1.6 +dnl # make +dnl # 4) Set GZIP compression in ZFS dataset: +dnl # zfs set compression = gzip +dnl # +dnl # Then the data written to this ZFS pool is compressed by QAT accelerator +dnl # automatically, and de-compressed by QAT when read from the pool. +dnl # +dnl # 1) Get QAT hardware statistics with: +dnl # cat /proc/icp_dh895xcc_dev/qat +dnl # 2) To disable QAT: +dnl # insmod zfs.ko zfs_qat_disable=1 dnl # AC_DEFUN([ZFS_AC_QAT], [ AC_ARG_WITH([qat], @@ -351,11 +442,11 @@ AC_DEFUN([ZFS_AC_QAT], [ QAT_SRC="${qatsrc}/quickassist" AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [ AC_MSG_ERROR([ - *** Please make sure the qat driver package is installed - *** and specify the location of the qat source with the - *** '--with-qat=PATH' option then try again. Failed to - *** find cpa.h in: - ${QAT_SRC}/include]) + *** Please make sure the qat driver package is installed + *** and specify the location of the qat source with the + *** '--with-qat=PATH' option then try again. Failed to + *** find cpa.h in: + ${QAT_SRC}/include]) ]) ]) @@ -369,9 +460,9 @@ AC_DEFUN([ZFS_AC_QAT], [ QAT_OBJ=${qatbuild} AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [ AC_MSG_ERROR([ - *** Please make sure the qat driver is installed then try again. - *** Failed to find icp_qa_al.ko or qat_api.ko in: - $QAT_OBJ]) + *** Please make sure the qat driver is installed then try again. + *** Failed to find icp_qa_al.ko or qat_api.ko in: + $QAT_OBJ]) ]) AC_SUBST(QAT_SRC) @@ -392,10 +483,10 @@ AC_DEFUN([ZFS_AC_QAT], [ AC_MSG_RESULT([$QAT_SYMBOLS]) AC_SUBST(QAT_SYMBOLS) ],[ - AC_MSG_ERROR([ - *** Please make sure the qat driver is installed then try again. - *** Failed to find Module.symvers in: - $QAT_SYMBOLS]) + AC_MSG_ERROR([ + *** Please make sure the qat driver is installed then try again. + *** Failed to find Module.symvers in: + $QAT_SYMBOLS ]) ]) ]) @@ -404,14 +495,16 @@ AC_DEFUN([ZFS_AC_QAT], [ dnl # dnl # Basic toolchain sanity check. dnl # -AC_DEFUN([ZFS_AC_TEST_MODULE], [ +AC_DEFUN([ZFS_AC_KERNEL_TEST_MODULE], [ AC_MSG_CHECKING([whether modules can be built]) - ZFS_LINUX_TRY_COMPILE([],[],[ + ZFS_LINUX_TRY_COMPILE([], [], [ AC_MSG_RESULT([yes]) ],[ AC_MSG_RESULT([no]) if test "x$enable_linux_builtin" != xyes; then - AC_MSG_ERROR([*** Unable to build an empty module.]) + AC_MSG_ERROR([ + *** Unable to build an empty module. + ]) else AC_MSG_ERROR([ *** Unable to build an empty module. @@ -421,206 +514,313 @@ AC_DEFUN([ZFS_AC_TEST_MODULE], [ ]) dnl # -dnl # Certain kernel build options are not supported. These must be -dnl # detected at configure time and cause a build failure. Otherwise -dnl # modules may be successfully built that behave incorrectly. +dnl # ZFS_LINUX_CONFTEST_H dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [ - AS_IF([test "x$cross_compiling" != xyes], [ - AC_RUN_IFELSE([ - AC_LANG_PROGRAM([ - #include "$LINUX/include/linux/license.h" - ], [ - return !license_is_gpl_compatible("$ZFS_META_LICENSE"); - ]) - ], [ - AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], - [Define to 1 if GPL-only symbols can be used]) - ], [ - ]) - ]) +AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ +test -d build/$2 || mkdir -p build/$2 +cat - <<_ACEOF >build/$2/$2.h +$1 +_ACEOF +]) - ZFS_AC_KERNEL_CONFIG_THREAD_SIZE - ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC - ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS - ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE - ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE +dnl # +dnl # ZFS_LINUX_CONFTEST_C +dnl # +AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ +test -d build/$2 || mkdir -p build/$2 +cat confdefs.h - <<_ACEOF >build/$2/$2.c +$1 +_ACEOF ]) dnl # -dnl # Check configured THREAD_SIZE +dnl # ZFS_LINUX_CONFTEST_MAKEFILE dnl # -dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 -dnl # the default thread stack size was increased to 16K from 8K. Therefore, -dnl # on newer kernels and some architectures stack usage optimizations can be -dnl # conditionally applied to improve performance without negatively impacting -dnl # stability. +dnl # $1 - test case name +dnl # $2 - add to top-level Makefile +dnl # $3 - additional build flags dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ - AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - #if (THREAD_SIZE < 16384) - #error "THREAD_SIZE is less than 16K" - #endif - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) - ],[ - AC_MSG_RESULT([no]) - ]) +AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [ + test -d build || mkdir -p build + test -d build/$1 || mkdir -p build/$1 + + file=build/$1/Makefile + + dnl # Example command line to manually build source. + cat - <<_ACEOF >$file +# Example command line to manually build source +# make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1 + +ccflags-y := -Werror $FRAME_LARGER_THAN +_ACEOF + + dnl # Additional custom CFLAGS as requested. + m4_ifval($3, [echo "ccflags-y += $3" >>$file], []) + + dnl # Test case source + echo "obj-m := $1.o" >>$file + + AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], []) ]) dnl # -dnl # Check CONFIG_DEBUG_LOCK_ALLOC +dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY]) +dnl # +m4_define([ZFS_LINUX_TEST_PROGRAM], [ +$1 +int +main (void) +{ +$2 + ; + return 0; +} +]) + dnl # -dnl # This is typically only set for debug kernels because it comes with -dnl # a performance penalty. However, when it is set it maps the non-GPL -dnl # symbol mutex_lock() to the GPL-only mutex_lock_nested() symbol. -dnl # This will cause a failure at link time which we'd rather know about -dnl # at compile time. +dnl # ZFS_LINUX_TEST_REMOVE dnl # -dnl # Since we plan to pursue making mutex_lock_nested() a non-GPL symbol -dnl # with the upstream community we add a check to detect this case. +dnl # Removes the specified test source and results. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [ - - ZFS_LINUX_CONFIG([DEBUG_LOCK_ALLOC], [ - AC_MSG_CHECKING([whether mutex_lock() is GPL-only]) - tmp_flags="$EXTRA_KCFLAGS" - ZFS_LINUX_TRY_COMPILE([ - #include - #include +AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [ + test -d build/$1 && rm -Rf build/$1 + test -f build/Makefile && sed '/$1/d' build/Makefile +]) - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct mutex lock; +dnl # +dnl # ZFS_LINUX_COMPILE +dnl # +dnl # $1 - build dir +dnl # $2 - test command +dnl # $3 - pass command +dnl # $4 - fail command +dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes' +dnl # $6 - set KBUILD_MODPOST_WARN='yes' +dnl # +dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST} +dnl # +AC_DEFUN([ZFS_LINUX_COMPILE], [ + AC_TRY_COMMAND([ + KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6" + make modules -k -j$TEST_JOBS -C $LINUX_OBJ $ARCH_UM + M=$PWD/$1 &>$1/build.log]) + AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4]) +]) - mutex_init(&lock); - mutex_lock(&lock); - mutex_unlock(&lock); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_MSG_ERROR([ - *** Kernel built with CONFIG_DEBUG_LOCK_ALLOC which is incompatible - *** with the CDDL license and will prevent the module linking stage - *** from succeeding. You must rebuild your kernel without this - *** option enabled.]) - ]) - EXTRA_KCFLAGS="$tmp_flags" - ], []) +dnl # +dnl # ZFS_LINUX_TEST_COMPILE +dnl # +dnl # Perform a full compile excluding the final modpost phase. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [ + ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ + mv $2/Makefile $2/Makefile.compile.$1 + mv $2/build.log $2/build.log.$1 + ],[ + AC_MSG_ERROR([ + *** Unable to compile test source to determine kernel interfaces.]) + ], [yes], []) ]) dnl # -dnl # Check CONFIG_TRIM_UNUSED_KSYMS +dnl # ZFS_LINUX_TEST_MODPOST dnl # -dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS disabled. +dnl # Perform a full compile including the modpost phase. This may +dnl # be an incremental build if the objects have already been built. dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS], [ - AC_MSG_CHECKING([whether CONFIG_TRIM_UNUSED_KSYM is disabled]) - ZFS_LINUX_TRY_COMPILE([ - #if defined(CONFIG_TRIM_UNUSED_KSYMS) - #error CONFIG_TRIM_UNUSED_KSYMS not defined - #endif - ],[ ],[ - AC_MSG_RESULT([yes]) +AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [ + ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ + mv $2/Makefile $2/Makefile.modpost.$1 + cat $2/build.log >>build/build.log.$1 ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([ - *** This kernel has unused symbols trimming enabled, please disable. - *** Rebuild the kernel with CONFIG_TRIM_UNUSED_KSYMS=n set.]) + AC_MSG_ERROR([ + *** Unable to modpost test source to determine kernel interfaces.]) + ], [], [yes]) +]) + +dnl # +dnl # Perform the compilation of the test cases in two phases. +dnl # +dnl # Phase 1) attempt to build the object files for all of the tests +dnl # defined by the ZFS_LINUX_TEST_SRC macro. But do not +dnl # perform the final modpost stage. +dnl # +dnl # Phase 2) disable all tests which failed the initial compilation, +dnl # then invoke the final modpost step for the remaining tests. +dnl # +dnl # This allows us efficiently build the test cases in parallel while +dnl # remaining resilient to build failures which are expected when +dnl # detecting the available kernel interfaces. +dnl # +dnl # The maximum allowed parallelism can be controlled by setting the +dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc). +dnl # +AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [ + dnl # Phase 1 - Compilation only, final linking is skipped. + ZFS_LINUX_TEST_COMPILE([$1], [build]) + + dnl # + dnl # Phase 2 - When building external modules disable test cases + dnl # which failed to compile and invoke modpost to verify the + dnl # final linking. + dnl # + dnl # Test names suffixed with '_license' call modpost independently + dnl # to ensure that a single incompatibility does not result in the + dnl # modpost phase exiting early. This check is not performed on + dnl # every symbol since the majority are compatible and doing so + dnl # would significantly slow down this phase. + dnl # + dnl # When configuring for builtin (--enable-linux-builtin) + dnl # fake the linking step artificially create the expected .ko + dnl # files for tests which did compile. This is required for + dnl # kernels which do not have loadable module support or have + dnl # not yet been built. + dnl # + AS_IF([test "x$enable_linux_builtin" = "xno"], [ + for dir in $(awk '/^obj-m/ { print [$]3 }' \ + build/Makefile.compile.$1); do + name=${dir%/} + AS_IF([test -f build/$name/$name.o], [ + AS_IF([test "${name##*_}" = "license"], [ + ZFS_LINUX_TEST_MODPOST([$1], + [build/$name]) + echo "obj-n += $dir" >>build/Makefile + ], [ + echo "obj-m += $dir" >>build/Makefile + ]) + ], [ + echo "obj-n += $dir" >>build/Makefile + ]) + done + + ZFS_LINUX_TEST_MODPOST([$1], [build]) + ], [ + for dir in $(awk '/^obj-m/ { print [$]3 }' \ + build/Makefile.compile.$1); do + name=${dir%/} + AS_IF([test -f build/$name/$name.o], [ + touch build/$name/$name.ko + ]) + done ]) ]) dnl # -dnl # ZFS_LINUX_CONFTEST_H +dnl # ZFS_LINUX_TEST_SRC dnl # -AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ -cat - <<_ACEOF >conftest.h -$1 -_ACEOF +dnl # $1 - name +dnl # $2 - global +dnl # $3 - source +dnl # $4 - extra cflags +dnl # $5 - check license-compatibility +dnl # +dnl # N.B because all of the test cases are compiled in parallel they +dnl # must never depend on the results of previous tests. Each test +dnl # needs to be entirely independent. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_SRC], [ + ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]])], [$1]) + ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4]) + + AS_IF([ test -n "$5" ], [ + ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[ + #include + MODULE_LICENSE("$5"); + $2]], [[$3]])], [$1_license]) + ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4]) + ]) ]) dnl # -dnl # ZFS_LINUX_CONFTEST_C +dnl # ZFS_LINUX_TEST_RESULT dnl # -AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ -cat confdefs.h - <<_ACEOF >conftest.c -$1 -_ACEOF +dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC) +dnl # $2 - run on success (valid .ko generated) +dnl # $3 - run on failure (unable to compile) +dnl # +AC_DEFUN([ZFS_LINUX_TEST_RESULT], [ + AS_IF([test -d build/$1], [ + AS_IF([test -f build/$1/$1.ko], [$2], [$3]) + ], [ + AC_MSG_ERROR([ + *** No matching source for the "$1" test, check that + *** both the test source and result macros refer to the same name. + ]) + ]) ]) dnl # -dnl # ZFS_LANG_PROGRAM(C)([PROLOGUE], [BODY]) +dnl # ZFS_LINUX_TEST_ERROR dnl # -m4_define([ZFS_LANG_PROGRAM], [ -$1 -int -main (void) -{ -dnl Do *not* indent the following line: there may be CPP directives. -dnl Don't move the `;' right after for the same reason. -$2 - ; - return 0; -} +dnl # Generic error message which can be used when none of the expected +dnl # kernel interfaces were detected. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_ERROR], [ + AC_MSG_ERROR([ + *** None of the expected "$1" interfaces were detected. + *** This may be because your kernel version is newer than what is + *** supported, or you are using a patched custom kernel with + *** incompatible modifications. + *** + *** ZFS Version: $ZFS_META_ALIAS + *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX + ]) ]) dnl # -dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE +dnl # ZFS_LINUX_TEST_RESULT_SYMBOL dnl # -AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ - m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])]) - m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])]) - rm -Rf build && mkdir -p build && touch build/conftest.mod.c - echo "obj-m := conftest.o" >build/Makefile - modpost_flag='' - test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage - AS_IF( - [AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $FRAME_LARGER_THAN $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])], - [$4], - [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])] - ) - rm -Rf build +dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to +dnl # verify symbol exports, unless --enable-linux-builtin was provided to +dnl # configure. +dnl # +AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [ + AS_IF([ ! test -f build/$1/$1.ko], [ + $5 + ], [ + AS_IF([test "x$enable_linux_builtin" != "xyes"], [ + ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [$4], [$5]) + ], [ + $4 + ]) + ]) ]) dnl # -dnl # ZFS_LINUX_TRY_COMPILE like AC_TRY_COMPILE +dnl # ZFS_LINUX_COMPILE_IFELSE dnl # -AC_DEFUN([ZFS_LINUX_TRY_COMPILE], - [ZFS_LINUX_COMPILE_IFELSE( - [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], - [modules], - [test -s build/conftest.o], - [$3], [$4]) +AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ + ZFS_LINUX_TEST_REMOVE([conftest]) + + m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])]) + m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])], + [ZFS_LINUX_CONFTEST_H([], [conftest])]) + + ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no], + [m4_ifvaln([$5], [-I$PWD/build/conftest], [])]) + ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], []) ]) dnl # -dnl # ZFS_LINUX_CONFIG +dnl # ZFS_LINUX_TRY_COMPILE dnl # -AC_DEFUN([ZFS_LINUX_CONFIG], - [AC_MSG_CHECKING([whether kernel was built with CONFIG_$1]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - #ifndef CONFIG_$1 - #error CONFIG_$1 not #defined - #endif - ],[ - AC_MSG_RESULT([yes]) - $2 - ],[ - AC_MSG_RESULT([no]) - $3 - ]) +dnl # $1 - global +dnl # $2 - source +dnl # $3 - run on success (valid .ko generated) +dnl # $4 - run on failure (unable to compile) +dnl # +AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.ko], + [$3], [$4]) ]) dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT -dnl # check symbol exported or not +dnl # +dnl # Check if a symbol is exported on not by consulting the symbols +dnl # file, or optionally the source code. dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ @@ -649,8 +849,10 @@ AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL -dnl # like ZFS_LINUX_TRY_COMPILE, except ZFS_CHECK_SYMBOL_EXPORT -dnl # is called if not compiling for builtin +dnl # +dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called +dnl # to verify symbol exports, unless --enable-linux-builtin was provided +dnl # to configure. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) @@ -673,10 +875,9 @@ dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # -AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], - [ZFS_LINUX_COMPILE_IFELSE( - [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], - [modules], - [test -s build/conftest.o], - [$3], [$4], [$5]) +AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ + ZFS_LINUX_COMPILE_IFELSE( + [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])], + [test -f build/conftest/conftest.ko], + [$3], [$4], [$5]) ]) diff --git a/config/lib-link.m4 b/config/lib-link.m4 index 0ff10731facd..01766c315c97 100644 --- a/config/lib-link.m4 +++ b/config/lib-link.m4 @@ -216,7 +216,7 @@ AC_DEFUN([AC_LIB_LINKFLAGS_BODY], fi ]) dnl Search the library and its dependencies in $additional_libdir and - dnl $LDFLAGS. Using breadth-first-seach. + dnl $LDFLAGS. Using breadth-first-search. LIB[]NAME= LTLIB[]NAME= INC[]NAME= diff --git a/config/pkg.m4 b/config/pkg.m4 index 13a889017866..f9075e56c87a 100644 --- a/config/pkg.m4 +++ b/config/pkg.m4 @@ -86,7 +86,7 @@ dnl Check to see whether a particular set of modules exists. Similar to dnl PKG_CHECK_MODULES(), but does not set variables or print errors. dnl dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -dnl only at the first occurence in configure.ac, so if the first place +dnl only at the first occurrence in configure.ac, so if the first place dnl it's called might be skipped (such as if it is within an "if", you dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], diff --git a/config/user.m4 b/config/user.m4 index 1ee9dbe263bc..3d97e9a418c3 100644 --- a/config/user.m4 +++ b/config/user.m4 @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [ dnl # dnl # Setup the environment for the ZFS Test Suite. Currently only -dnl # Linux sytle systems are supported but this infrastructure can +dnl # Linux style systems are supported but this infrastructure can dnl # be extended to support other platforms if needed. dnl # AC_DEFUN([ZFS_AC_TEST_FRAMEWORK], [ diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 8e221f2d7d40..92aa6030dd16 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -166,6 +166,17 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ]) AC_DEFUN([ZFS_AC_CONFIG], [ + + dnl # Remove the previous build test directory. + rm -Rf build + + AC_ARG_VAR([TEST_JOBS], + [simultaneous jobs during configure (defaults to $(nproc))]) + if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then + TEST_JOBS=$(nproc) + fi + AC_SUBST(TEST_JOBS) + ZFS_CONFIG=all AC_ARG_WITH([config], AS_HELP_STRING([--with-config=CONFIG], @@ -461,7 +472,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [ AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT]) AC_SUBST(DEFAULT_INIT_SCRIPT) - AC_MSG_CHECKING([default init config direectory]) + AC_MSG_CHECKING([default init config directory]) case "$VENDOR" in alpine) DEFAULT_INITCONF_DIR=/etc/conf.d ;; gentoo) DEFAULT_INITCONF_DIR=/etc/conf.d ;; diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4 index aa0fc1420933..b3c1befaac5d 100644 --- a/config/zfs-meta.m4 +++ b/config/zfs-meta.m4 @@ -138,6 +138,24 @@ AC_DEFUN([ZFS_AC_META], [ AC_SUBST([ZFS_META_AUTHOR]) fi + ZFS_META_KVER_MIN=_ZFS_AC_META_GETVAL([Linux-Minimum]); + if test -n "$ZFS_META_KVER_MIN"; then + AC_DEFINE_UNQUOTED([ZFS_META_KVER_MIN], + ["$ZFS_META_KVER_MIN"], + [Define the minimum compatible kernel version.] + ) + AC_SUBST([ZFS_META_KVER_MIN]) + fi + + ZFS_META_KVER_MAX=_ZFS_AC_META_GETVAL([Linux-Maximum]); + if test -n "$ZFS_META_KVER_MAX"; then + AC_DEFINE_UNQUOTED([ZFS_META_KVER_MAX], + ["$ZFS_META_KVER_MAX"], + [Define the maximum compatible kernel version.] + ) + AC_SUBST([ZFS_META_KVER_MAX]) + fi + m4_pattern_allow([^LT_(CURRENT|REVISION|AGE)$]) ZFS_META_LT_CURRENT=_ZFS_AC_META_GETVAL([LT_Current]); ZFS_META_LT_REVISION=_ZFS_AC_META_GETVAL([LT_Revision]); diff --git a/configure.ac b/configure.ac index db614084e37e..6fcc89044dd8 100644 --- a/configure.ac +++ b/configure.ac @@ -120,8 +120,10 @@ AC_CONFIG_FILES([ cmd/dbufstat/Makefile cmd/arc_summary/Makefile cmd/zed/Makefile + cmd/zed/zed.d/Makefile cmd/raidz_test/Makefile cmd/zgenhostid/Makefile + cmd/zvol_wait/Makefile contrib/Makefile contrib/bash_completion.d/Makefile contrib/dracut/Makefile @@ -133,6 +135,7 @@ AC_CONFIG_FILES([ contrib/initramfs/scripts/local-top/Makefile contrib/pyzfs/Makefile contrib/pyzfs/setup.py + contrib/zcp/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile @@ -271,6 +274,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile + tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile tests/zfs-tests/tests/functional/compression/Makefile tests/zfs-tests/tests/functional/cp_files/Makefile tests/zfs-tests/tests/functional/ctime/Makefile @@ -319,6 +323,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/rename_dirs/Makefile tests/zfs-tests/tests/functional/replacement/Makefile tests/zfs-tests/tests/functional/reservation/Makefile + tests/zfs-tests/tests/functional/resilver/Makefile tests/zfs-tests/tests/functional/rootpool/Makefile tests/zfs-tests/tests/functional/rsend/Makefile tests/zfs-tests/tests/functional/scrub_mirror/Makefile @@ -326,6 +331,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/snapshot/Makefile tests/zfs-tests/tests/functional/snapused/Makefile tests/zfs-tests/tests/functional/sparse/Makefile + tests/zfs-tests/tests/functional/suid/Makefile tests/zfs-tests/tests/functional/alloc_class/Makefile tests/zfs-tests/tests/functional/threadsappend/Makefile tests/zfs-tests/tests/functional/tmpfile/Makefile diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 81926a83ee69..9a82f82ee387 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = bash_completion.d dracut initramfs pyzfs -DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs +SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp +DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp diff --git a/contrib/dracut/90zfs/mount-zfs.sh.in b/contrib/dracut/90zfs/mount-zfs.sh.in index 23f7e3e295ed..73300a9b6633 100755 --- a/contrib/dracut/90zfs/mount-zfs.sh.in +++ b/contrib/dracut/90zfs/mount-zfs.sh.in @@ -62,11 +62,15 @@ if import_pool "${ZFS_POOL}" ; then # if the root dataset has encryption enabled ENCRYPTIONROOT="$(zfs get -H -o value encryptionroot "${ZFS_DATASET}")" if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - # decrypt them - ask_for_password \ - --tries 5 \ - --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \ - --cmd "zfs load-key '${ENCRYPTIONROOT}'" + KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" + # if the key needs to be loaded + if [ "$KEYSTATUS" = "unavailable" ]; then + # decrypt them + ask_for_password \ + --tries 5 \ + --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \ + --cmd "zfs load-key '${ENCRYPTIONROOT}'" + fi fi fi # Let us tell the initrd to run on shutdown. diff --git a/contrib/dracut/90zfs/zfs-lib.sh.in b/contrib/dracut/90zfs/zfs-lib.sh.in index 23c07af9e86f..44021c6e5fc1 100755 --- a/contrib/dracut/90zfs/zfs-lib.sh.in +++ b/contrib/dracut/90zfs/zfs-lib.sh.in @@ -144,7 +144,7 @@ ask_for_password() { { flock -s 9; # Prompt for password with plymouth, if installed and running. - if whereis plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then + if type plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then plymouth ask-for-password \ --prompt "$ply_prompt" --number-of-tries="$ply_tries" \ --command="$ply_cmd" diff --git a/contrib/dracut/90zfs/zfs-load-key.sh.in b/contrib/dracut/90zfs/zfs-load-key.sh.in index 9e7adfc79786..85e55c51bfa4 100755 --- a/contrib/dracut/90zfs/zfs-load-key.sh.in +++ b/contrib/dracut/90zfs/zfs-load-key.sh.in @@ -25,23 +25,34 @@ while true; do done # run this after import as zfs-import-cache/scan service is confirmed good +# we do not overwrite the ${root} variable, but create a new one, BOOTFS, to hold the dataset if [ "${root}" = "zfs:AUTO" ] ; then - root="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')" + BOOTFS="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')" else - root="${root##zfs:}" - root="${root##ZFS=}" + BOOTFS="${root##zfs:}" + BOOTFS="${BOOTFS##ZFS=}" fi # if pool encryption is active and the zfs command understands '-o encryption' -if [ "$(zpool list -H -o feature@encryption $(echo "${root}" | awk -F\/ '{print $1}'))" = 'active' ]; then +if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{print $1}'))" = 'active' ]; then # if the root dataset has encryption enabled - ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${root}") + ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}") + # where the key is stored (in a file or loaded via prompt) + KEYLOCATION=$(zfs get -H -o value keylocation "${ENCRYPTIONROOT}") if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - # decrypt them - TRY_COUNT=5 - while [ $TRY_COUNT -gt 0 ]; do - systemd-ask-password "Encrypted ZFS password for ${root}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break - TRY_COUNT=$((TRY_COUNT - 1)) - done + KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")" + # continue only if the key needs to be loaded + [ "$KEYSTATUS" = "unavailable" ] || exit 0 + # if key is stored in a file, do not prompt + if ! [ "${KEYLOCATION}" = "prompt" ]; then + zfs load-key "${ENCRYPTIONROOT}" + else + # decrypt them + TRY_COUNT=5 + while [ $TRY_COUNT -gt 0 ]; do + systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break + TRY_COUNT=$((TRY_COUNT - 1)) + done + fi fi fi diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 87ec7a86f5ac..52bdeb2afe54 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -1,4 +1,4 @@ -initrddir = $(datarootdir)/initramfs-tools +initrddir = /usr/share/initramfs-tools initrd_SCRIPTS = \ conf.d/zfs conf-hooks.d/zfs hooks/zfs scripts/zfs scripts/local-top/zfs @@ -6,18 +6,29 @@ initrd_SCRIPTS = \ SUBDIRS = hooks scripts EXTRA_DIST = \ + $(top_srcdir)/etc/init.d/zfs \ + $(top_srcdir)/etc/init.d/zfs-functions \ $(top_srcdir)/contrib/initramfs/conf.d/zfs \ $(top_srcdir)/contrib/initramfs/conf-hooks.d/zfs \ $(top_srcdir)/contrib/initramfs/README.initramfs.markdown +$(top_srcdir)/etc/init.d/zfs $(top_srcdir)/etc/init.d/zfs-functions: + $(MAKE) -C $(top_srcdir)/etc/init.d zfs zfs-functions + install-initrdSCRIPTS: $(EXTRA_DIST) - for d in conf.d conf-hooks.d hooks scripts scripts/local-top; do \ - $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ - cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ - $(DESTDIR)$(initrddir)/$$d/; \ + for d in conf.d conf-hooks.d scripts/local-top; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ + done + for d in hooks scripts; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_builddir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ done - if [ -f etc/init.d/zfs ]; then \ - $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ - cp $(top_srcdir)/etc/init.d/zfs \ + $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ + cp $(top_builddir)/etc/init.d/zfs \ $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ - fi + $(MKDIR_P) $(DESTDIR)$(sysconfdir)/zfs; \ + cp $(top_builddir)/etc/init.d/zfs-functions \ + $(DESTDIR)$(sysconfdir)/zfs/ diff --git a/contrib/initramfs/conf-hooks.d/zfs b/contrib/initramfs/conf-hooks.d/zfs index 29950cac04bb..b86d36223e3c 100644 --- a/contrib/initramfs/conf-hooks.d/zfs +++ b/contrib/initramfs/conf-hooks.d/zfs @@ -1,2 +1,9 @@ # Force the inclusion of Busybox in the initramfs. BUSYBOX=y + +# Setup the keyboard mapping so passphrases can be entered correctly. +KEYMAP=y + +# Require the plymouth script to guarantee working video for the passphrase +# prompting. +FRAMEBUFFER=y diff --git a/contrib/initramfs/hooks/Makefile.am b/contrib/initramfs/hooks/Makefile.am index c866b4fb6cd7..1735872c29b7 100644 --- a/contrib/initramfs/hooks/Makefile.am +++ b/contrib/initramfs/hooks/Makefile.am @@ -1,4 +1,4 @@ -hooksdir = $(datarootdir)/initramfs-tools/hooks +hooksdir = /usr/share/initramfs-tools/hooks hooks_SCRIPTS = \ zfs diff --git a/contrib/initramfs/hooks/zfs.in b/contrib/initramfs/hooks/zfs.in index e35354141d81..15f23c908b23 100755 --- a/contrib/initramfs/hooks/zfs.in +++ b/contrib/initramfs/hooks/zfs.in @@ -4,16 +4,18 @@ # # This hook installs udev rules for ZoL. -PREREQ="zdev" +PREREQ="udev" # These prerequisites are provided by the zfsutils package. The zdb utility is # not strictly required, but it can be useful at the initramfs recovery prompt. COPY_EXEC_LIST="@sbindir@/zdb @sbindir@/zpool @sbindir@/zfs" COPY_EXEC_LIST="$COPY_EXEC_LIST @mounthelperdir@/mount.zfs @udevdir@/vdev_id" +COPY_EXEC_LIST="$COPY_EXEC_LIST @udevdir@/zvol_id" COPY_FILE_LIST="/etc/hostid @sysconfdir@/zfs/zpool.cache" -COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/default/zfs" +COPY_FILE_LIST="$COPY_FILE_LIST @DEFAULT_INITCONF_DIR@/zfs" COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/zfs-functions" COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/vdev_id.conf" +COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/60-zvol.rules" COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/69-vdev.rules" # These prerequisites are provided by the base system. diff --git a/contrib/initramfs/scripts/Makefile.am b/contrib/initramfs/scripts/Makefile.am index a550311cd744..12c2641b80cc 100644 --- a/contrib/initramfs/scripts/Makefile.am +++ b/contrib/initramfs/scripts/Makefile.am @@ -1,4 +1,4 @@ -scriptsdir = $(datarootdir)/initramfs-tools/scripts +scriptsdir = /usr/share/initramfs-tools/scripts scripts_DATA = \ zfs diff --git a/contrib/initramfs/scripts/local-top/Makefile.am b/contrib/initramfs/scripts/local-top/Makefile.am index 88aa2d4ffa66..c820325947b0 100644 --- a/contrib/initramfs/scripts/local-top/Makefile.am +++ b/contrib/initramfs/scripts/local-top/Makefile.am @@ -1,3 +1,3 @@ -localtopdir = $(datarootdir)/initramfs-tools/scripts/local-top +localtopdir = /usr/share/initramfs-tools/scripts/local-top EXTRA_DIST = zfs diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index 36b7f436c1f1..4bbdf53a77d7 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -78,7 +78,7 @@ find_rootfs() { local pool="$1" - # If 'POOL_IMPORTED' isn't set, no pool imported and therefor + # If 'POOL_IMPORTED' isn't set, no pool imported and therefore # we won't be able to find a root fs. [ -z "${POOL_IMPORTED}" ] && return 1 @@ -135,7 +135,7 @@ get_pools() # Get the base list of available pools. available_pools=$(find_pools "$ZPOOL" import) - # Just in case - seen it happen (that a pool isn't visable/found + # Just in case - seen it happen (that a pool isn't visible/found # with a simple "zpool import" but only when using the "-d" # option or setting ZPOOL_IMPORT_PATH). if [ -d "/dev/disk/by-id" ] @@ -401,7 +401,7 @@ mount_fs() return 0 } -# Unlock a ZFS native crypted filesystem. +# Unlock a ZFS native encrypted filesystem. decrypt_fs() { local fs="$1" @@ -410,30 +410,43 @@ decrypt_fs() if [ "$(zpool list -H -o feature@encryption $(echo "${fs}" | awk -F\/ '{print $1}'))" = 'active' ]; then # Determine dataset that holds key for root dataset - ENCRYPTIONROOT=$(${ZFS} get -H -o value encryptionroot "${fs}") - DECRYPT_CMD="${ZFS} load-key '${ENCRYPTIONROOT}'" + ENCRYPTIONROOT="$(get_fs_value "${fs}" encryptionroot)" + KEYLOCATION="$(get_fs_value "${ENCRYPTIONROOT}" keylocation)" # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then + KEYSTATUS="$(get_fs_value "${ENCRYPTIONROOT}" keystatus)" + # Continue only if the key needs to be loaded + [ "$KEYSTATUS" = "unavailable" ] || return 0 + TRY_COUNT=3 + + # If key is stored in a file, do not prompt + if ! [ "${KEYLOCATION}" = "prompt" ]; then + $ZFS load-key "${ENCRYPTIONROOT}" # Prompt with plymouth, if active - if [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then - plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" \ - --number-of-tries="3" \ - --command="${DECRYPT_CMD}" + elif [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then + while [ $TRY_COUNT -gt 0 ]; do + plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \ + $ZFS load-key "${ENCRYPTIONROOT}" && break + TRY_COUNT=$((TRY_COUNT - 1)) + done # Prompt with systemd, if active elif [ -e /run/systemd/system ]; then - TRY_COUNT=3 while [ $TRY_COUNT -gt 0 ]; do systemd-ask-password "Encrypted ZFS password for ${ENCRYPTIONROOT}" --no-tty | \ - ${DECRYPT_CMD} && break + $ZFS load-key "${ENCRYPTIONROOT}" && break TRY_COUNT=$((TRY_COUNT - 1)) done # Prompt with ZFS tty, otherwise else - eval "${DECRYPT_CMD}" + # Setting "printk" temporarily to "7" will allow prompt even if kernel option "quiet" + storeprintk="$(awk '{print $1}' /proc/sys/kernel/printk)" + echo 7 > /proc/sys/kernel/printk + $ZFS load-key "${ENCRYPTIONROOT}" + echo "$storeprintk" > /proc/sys/kernel/printk fi fi fi @@ -606,7 +619,7 @@ setup_snapshot_booting() if ! grep -qiE '(^|[^\\](\\\\)* )(rollback)=(on|yes|1)( |$)' /proc/cmdline then # If the destination dataset for the clone - # already exists, destroy it. Recursivly + # already exists, destroy it. Recursively if [ $(get_fs_value "${rootfs}_${snapname}" type) ]; then filesystems=$("${ZFS}" list -oname -tfilesystem -H \ -r -Sname "${ZFS_BOOTFS}") @@ -616,7 +629,7 @@ setup_snapshot_booting() fi fi - # Get all snapshots, recursivly (might need to clone /usr, /var etc + # Get all snapshots, recursively (might need to clone /usr, /var etc # as well). for s in $("${ZFS}" list -H -oname -tsnapshot -r "${rootfs}" | \ grep "${snapname}") @@ -843,7 +856,7 @@ mountroot() # Strip 'zfs:' and 'ZFS='. ZFS_BOOTFS="${ROOT#*[:=]}" - # Stip everything after the first slash. + # Strip everything after the first slash. ZFS_RPOOL="${ZFS_BOOTFS%%/*}" fi @@ -878,24 +891,33 @@ mountroot() pool="$("${ZPOOL}" get name,guid -o name,value -H | \ awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')" if [ -n "$pool" ]; then - ZFS_BOOTFS="${pool}/${ZFS_BOOTFS#*/}" + # If $ZFS_BOOTFS contains guid, replace the guid portion with $pool + ZFS_BOOTFS=$(echo "$ZFS_BOOTFS" | \ + sed -e "s/$("${ZPOOL}" get guid -o value "$pool" -H)/$pool/g") ZFS_RPOOL="${pool}" fi - # Set elevator=noop on the root pool's vdevs' disks. ZFS already - # does this for wholedisk vdevs (for all pools), so this is only - # important for partitions. + # Set the no-op scheduler on the disks containing the vdevs of + # the root pool. For single-queue devices, this scheduler is + # "noop", for multi-queue devices, it is "none". + # ZFS already does this for wholedisk vdevs (for all pools), so this + # is only important for partitions. "${ZPOOL}" status -L "${ZFS_RPOOL}" 2> /dev/null | awk '/^\t / && !/(mirror|raidz)/ { dev=$1; sub(/[0-9]+$/, "", dev); print dev }' | - while read i + while read -r i do - if grep -sq noop /sys/block/$i/queue/scheduler + SCHEDULER=/sys/block/$i/queue/scheduler + if [ -e "${SCHEDULER}" ] then - echo noop > "/sys/block/$i/queue/scheduler" + # Query to see what schedulers are available + case "$(cat "${SCHEDULER}")" in + *noop*) echo noop > "${SCHEDULER}" ;; + *none*) echo none > "${SCHEDULER}" ;; + esac fi done diff --git a/contrib/pyzfs/Makefile.am b/contrib/pyzfs/Makefile.am index 1549bf237932..fa1bb32ce2eb 100644 --- a/contrib/pyzfs/Makefile.am +++ b/contrib/pyzfs/Makefile.am @@ -24,7 +24,7 @@ all-local: # files are later created by manually loading the Python modules. # install-exec-local: - $(PYTHON) $(srcdir)/setup.py install \ + $(PYTHON) $(builddir)/setup.py install \ --prefix $(prefix) \ --root $(DESTDIR)/ \ --install-lib $(pythonsitedir) \ diff --git a/contrib/pyzfs/docs/source/conf.py b/contrib/pyzfs/docs/source/conf.py index 4ffd7c93e5bd..4bbb938b6296 100644 --- a/contrib/pyzfs/docs/source/conf.py +++ b/contrib/pyzfs/docs/source/conf.py @@ -291,7 +291,7 @@ ####################### # Neutralize effects of function wrapping on documented signatures. -# The affected signatures could be explcitly placed into the +# The affected signatures could be explicitly placed into the # documentation (either in .rst files or as a first line of a # docstring). import functools diff --git a/contrib/pyzfs/libzfs_core/_libzfs_core.py b/contrib/pyzfs/libzfs_core/_libzfs_core.py index 5c8a1f5e690a..ed3ea3201c2a 100644 --- a/contrib/pyzfs/libzfs_core/_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/_libzfs_core.py @@ -300,7 +300,7 @@ def lzc_destroy_snaps(snaps, defer): Typical error is :exc:`SnapshotIsCloned` if `defer` is `False`. The snapshot names are validated quite loosely and invalid names are - typically ignored as nonexisiting snapshots. + typically ignored as nonexisting snapshots. A snapshot name referring to a filesystem that doesn't exist is ignored. @@ -470,7 +470,7 @@ def lzc_hold(holds, fd=None): Holds for snapshots which don't exist will be skipped and have an entry added to the return value, but will not cause an overall failure. No exceptions is raised if all holds, for snapshots that existed, were - succesfully created. + successfully created. Otherwise :exc:`.HoldFailure` exception is raised and no holds will be created. :attr:`.HoldFailure.errors` may contain a single element for an error that @@ -654,7 +654,7 @@ def lzc_send_space(snapname, fromsnap=None, flags=None): should be done. :param fromsnap: the optional starting snapshot name. If not `None` then an incremental stream size is estimated, otherwise - a full stream is esimated. + a full stream is estimated. :type fromsnap: `bytes` or `None` :param flags: the flags that control what enhanced features can be used in the stream. @@ -1178,11 +1178,11 @@ def receive_header(fd): the type of the dataset for which the stream has been created (volume, filesystem) ''' - # read sizeof(dmu_replay_record_t) bytes directly into the memort backing + # read sizeof(dmu_replay_record_t) bytes directly into the memory backing # 'record' record = _ffi.new("dmu_replay_record_t *") _ffi.buffer(record)[:] = os.read(fd, _ffi.sizeof(record[0])) - # get drr_begin member and its representation as a Pythn dict + # get drr_begin member and its representation as a Python dict drr_begin = record.drr_u.drr_begin header = {} for field, descr in _ffi.typeof(drr_begin).fields: @@ -1704,7 +1704,7 @@ def lzc_set_props(name, prop, val): # As the extended API is not committed yet, the names of the new interfaces # are not settled down yet. # It's not clear if atomically setting multiple properties is an achievable -# goal and an interface acting on mutiple entities must do so atomically +# goal and an interface acting on multiple entities must do so atomically # by convention. # Being able to set a single property at a time is sufficient for ClusterHQ. lzc_set_prop = lzc_set_props @@ -1741,7 +1741,7 @@ def lzc_list(name, options): Absence of this option implies all types. The first of the returned file descriptors can be used to - read the listing in a binary encounded format. The data is + read the listing in a binary encoded format. The data is a series of variable sized records each starting with a fixed size header, the header is followed by a serialized ``nvlist``. Each record describes a single element and contains the element's diff --git a/contrib/pyzfs/libzfs_core/_nvlist.py b/contrib/pyzfs/libzfs_core/_nvlist.py index fe4239a3c06e..dc6d820bdea3 100644 --- a/contrib/pyzfs/libzfs_core/_nvlist.py +++ b/contrib/pyzfs/libzfs_core/_nvlist.py @@ -113,7 +113,7 @@ def packed_nvlist_out(packed_nvlist, packed_size): :param bytes packed_nvlist: packed nvlist_t. :param int packed_size: nvlist_t packed size. - :return: an `dict` of values representing the data containted by nvlist_t. + :return: an `dict` of values representing the data contained by nvlist_t. :rtype: dict """ props = {} diff --git a/contrib/pyzfs/libzfs_core/exceptions.py b/contrib/pyzfs/libzfs_core/exceptions.py index f465cd3d9309..f8a775433b3c 100644 --- a/contrib/pyzfs/libzfs_core/exceptions.py +++ b/contrib/pyzfs/libzfs_core/exceptions.py @@ -77,7 +77,7 @@ def __str__(self): ZFSError.__str__(self), len(self.errors), self.suppressed_count) def __repr__(self): - return "%s(%r, %r, errors=%r, supressed=%r)" % ( + return "%s(%r, %r, errors=%r, suppressed=%r)" % ( self.__class__.__name__, self.errno, self.message, self.errors, self.suppressed_count) @@ -372,7 +372,7 @@ def __init__(self, name): class QuotaExceeded(ZFSError): errno = errno.EDQUOT - message = "Quouta exceeded" + message = "Quota exceeded" def __init__(self, name): self.name = name diff --git a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py index 25f20a4aeebc..8279cefc46db 100644 --- a/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py @@ -1913,7 +1913,7 @@ def test_recv_incremental(self): filecmp.cmp( os.path.join(mnt1, name), os.path.join(mnt2, name), False)) - # This test case fails unless unless a patch from + # This test case fails unless a patch from # https://clusterhq.atlassian.net/browse/ZFS-20 # is applied to libzfs_core, otherwise it succeeds. @unittest.skip("fails with unpatched libzfs_core") @@ -2160,7 +2160,7 @@ def test_recv_incremental_non_clone_but_set_origin(self): with streams(srcfs, src1, src2) as (_, (full, incr)): lzc.lzc_receive(dst1, full.fileno()) lzc.lzc_snapshot([dst_snap]) - # becase cannot receive incremental and set origin on a non-clone + # because cannot receive incremental and set origin on a non-clone with self.assertRaises(lzc_exc.BadStream): lzc.lzc_receive(dst2, incr.fileno(), origin=dst1) @@ -2375,7 +2375,7 @@ def test_force_recv_full_existing_modified_mounted_fs(self): for i in range(1024): f.write(b'x' * 1024) lzc.lzc_receive(dst, stream.fileno(), force=True) - # The temporary file dissappears and any access, even close(), + # The temporary file disappears and any access, even close(), # results in EIO. self.assertFalse(os.path.exists(f.name)) with self.assertRaises(IOError): @@ -2462,7 +2462,7 @@ def test_force_recv_incremental_modified_mounted_fs(self): for i in range(1024): f.write(b'x' * 1024) lzc.lzc_receive(dst2, incr.fileno(), force=True) - # The temporary file dissappears and any access, even close(), + # The temporary file disappears and any access, even close(), # results in EIO. self.assertFalse(os.path.exists(f.name)) with self.assertRaises(IOError): diff --git a/contrib/zcp/Makefile.am b/contrib/zcp/Makefile.am new file mode 100644 index 000000000000..e6a777ad7ba7 --- /dev/null +++ b/contrib/zcp/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST = autosnap.lua diff --git a/contrib/zcp/autosnap.lua b/contrib/zcp/autosnap.lua new file mode 100644 index 000000000000..d9ae32ce458a --- /dev/null +++ b/contrib/zcp/autosnap.lua @@ -0,0 +1,75 @@ +-- Recursively snapshot every dataset with a given property +-- +-- Usage: zfs program autosnap.lua -- [-n] [-p ] + +results = {} + +args = ... +argv = args["argv"] +usage = [[ + + +usage: zfs program autosnap.lua -- [-n] [-p ] + + -n: performs checks only, does not take snapshots + -p : property to check. [default: com.sun:auto-snapshot] + : root snapshot to create [example: tank/data@backup] +]] + +property = "com.sun:auto-snapshot" +noop = false +root_snap = nil + +for i, arg in ipairs(argv) do + if arg == "-n" then + noop = true + elseif arg == "-p" then + elseif argv[i-1] == "-p" then + property = arg + else + root_snap = arg + end +end + +if root_snap == nil or property == nil then + error(usage) +end + +root_ds_name = "" +snap_name = "" +for i = 1, #root_snap do + if root_snap:sub(i, i) == "@" then + root_ds_name = root_snap:sub(1, i-1) + snap_name = root_snap:sub(i+1, root_snap:len()) + end +end + +function auto_snap(root) + auto, source = zfs.get_prop(root, property) + if auto == "true" then + ds_snap_name = root .. "@" .. snap_name + err = 0 + if noop then + err = zfs.check.snapshot(ds_snap_name) + else + err = zfs.sync.snapshot(ds_snap_name) + end + results[ds_snap_name] = err + end + for child in zfs.list.children(root) do + auto_snap(child) + end +end + +auto_snap(root_ds_name) +err_txt = "" +for ds, err in pairs(results) do + if err ~= 0 then + err_txt = err_txt .. "failed to create " .. ds .. ": " .. err .. "\n" + end +end +if err_txt ~= "" then + error(err_txt) +end + +return results diff --git a/etc/init.d/zfs-functions.in b/etc/init.d/zfs-functions.in index 490503e91391..043f1b07398e 100644 --- a/etc/init.d/zfs-functions.in +++ b/etc/init.d/zfs-functions.in @@ -72,7 +72,7 @@ elif type einfo > /dev/null 2>&1 ; then # zfs_log_progress_msg() { echo -n "$1"; } zfs_log_progress_msg() { echo -n; } else - # Unknown - simple substitues. + # Unknown - simple substitutes. zfs_log_begin_msg() { echo -n "$1"; } zfs_log_end_msg() { ret=$1 @@ -283,7 +283,7 @@ checksystem() # Called with zfs=(off|no|0) - bail because we don't # want anything import, mounted or shared. # HOWEVER, only do this if we're called at the boot up - # (from init), not if we're running interactivly (as in + # (from init), not if we're running interactively (as in # from the shell - we know what we're doing). [ -n "$init" ] && exit 3 fi @@ -294,13 +294,6 @@ checksystem() # Just make sure that /dev/zfs is created. udev_trigger - if ! [ "$(uname -m)" = "x86_64" ]; then - echo "Warning: You're not running 64bit. Currently native zfs in"; - echo " Linux is only supported and tested on 64bit."; - # should we break here? People doing this should know what they - # do, thus i'm not breaking here. - fi - return 0 } @@ -373,10 +366,13 @@ read_mtab() in_mtab() { - local fs="$(echo "$1" | sed 's,/,_,g')" + local mntpnt="$1" + # Remove 'unwanted' characters. + mntpnt=$(printf '%b\n' "$mntpnt" | sed -e 's,/,,g' \ + -e 's,-,,g' -e 's,\.,,g' -e 's, ,,g') local var - var="$(eval echo MTAB_$fs)" + var="$(eval echo MTAB_$mntpnt)" [ "$(eval echo "$""$var")" != "" ] return "$?" } diff --git a/etc/init.d/zfs-import.in b/etc/init.d/zfs-import.in index 420d2e8a7a4e..47c957baac4b 100644 --- a/etc/init.d/zfs-import.in +++ b/etc/init.d/zfs-import.in @@ -90,7 +90,7 @@ do_import_all_visible() already_imported=$(find_pools "$ZPOOL" list -H -oname) available_pools=$(find_pools "$ZPOOL" import) - # Just in case - seen it happen (that a pool isn't visable/found + # Just in case - seen it happen (that a pool isn't visible/found # with a simple "zpool import" but only when using the "-d" # option or setting ZPOOL_IMPORT_PATH). if [ -d "/dev/disk/by-id" ] @@ -187,7 +187,7 @@ do_import_all_visible() # Needs to be exported for "zpool" to catch it. [ -n "$ZPOOL_IMPORT_PATH" ] && export ZPOOL_IMPORT_PATH - # Mount all availible pools (except those set in ZFS_POOL_EXCEPTIONS. + # Mount all available pools (except those set in ZFS_POOL_EXCEPTIONS. # # If not interactive (run from init - variable init='/sbin/init') # we get ONE line for all pools being imported, with just a dot diff --git a/etc/init.d/zfs.in b/etc/init.d/zfs.in index 7998569b2c2d..42fc1161c725 100644 --- a/etc/init.d/zfs.in +++ b/etc/init.d/zfs.in @@ -1,5 +1,11 @@ # ZoL userland configuration. +# NOTE: This file is intended for sysv init and initramfs. +# Changing some of these settings may not make any difference on +# systemd-based setup, e.g. setting ZFS_MOUNT=no will not prevent systemd +# from launching zfs-mount.service during boot. +# See: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=901436 + # To enable a boolean setting, set it to yes, on, true, or 1. # Anything else will be interpreted as unset. @@ -91,6 +97,10 @@ MOUNT_EXTRA_OPTIONS="" # Only applicable for Debian GNU/Linux {dkms,initramfs}. ZFS_DKMS_ENABLE_DEBUG='no' +# Build kernel modules with the --enable-debuginfo switch? +# Only applicable for Debian GNU/Linux {dkms,initramfs}. +ZFS_DKMS_ENABLE_DEBUGINFO='no' + # Keep debugging symbols in kernel modules? # Only applicable for Debian GNU/Linux {dkms,initramfs}. ZFS_DKMS_DISABLE_STRIP='no' diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 5428eb25d92c..850396fb6c2e 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -71,12 +71,70 @@ process_line() { p_readonly="${8}" p_setuid="${9}" p_nbmand="${10}" + p_encroot="${11}" + p_keyloc="${12}" + + # Minimal pre-requisites to mount a ZFS dataset + wants="zfs-import.target" + + # Handle encryption + if [ -n "${p_encroot}" ] && + [ "${p_encroot}" != "-" ] ; then + keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" + if [ "${p_encroot}" = "${dataset}" ] ; then + pathdep="" + if [ "${p_keyloc%%://*}" = "file" ] ; then + pathdep="RequiresMountsFor='${p_keyloc#file://}'" + keyloadcmd="@sbindir@/zfs load-key '${dataset}'" + elif [ "${p_keyloc}" = "prompt" ] ; then + keyloadcmd="/bin/sh -c 'set -eu;"\ +"keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";"\ +"[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;"\ +"count=0;"\ +"while [ \$\$count -lt 3 ];do"\ +" systemd-ask-password --id=\"zfs:${dataset}\""\ +" \"Enter passphrase for ${dataset}:\"|"\ +" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ +" count=\$\$((count + 1));"\ +"done;"\ +"exit 1'" + else + printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ + "${dataset}" >/dev/kmsg + fi + + # Generate the key-load .service unit + cat > "${dest_norm}/${keyloadunit}" << EOF +# Automatically generated by zfs-mount-generator + +[Unit] +Description=Load ZFS key for ${dataset} +SourcePath=${cachefile} +Documentation=man:zfs-mount-generator(8) +DefaultDependencies=no +Wants=${wants} +After=${wants} +${pathdep} + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=${keyloadcmd} +ExecStop=@sbindir@/zfs unload-key '${dataset}' +EOF + fi + # Update the dependencies for the mount file to require the + # key-loading unit. + wants="${wants} ${keyloadunit}" + fi + + # Prepare the .mount unit # Check for canmount=off . if [ "${p_canmount}" = "off" ] ; then return elif [ "${p_canmount}" = "noauto" ] ; then - # Don't let a noauto marked mountpoint block an "auto" market mountpoint + # Don't let a noauto marked mountpoint block an "auto" marked mountpoint return elif [ "${p_canmount}" = "on" ] ; then : # This is OK @@ -175,6 +233,7 @@ process_line() { return fi + # Create the .mount unit file. # By ordering before zfs-mount.service, we avoid race conditions. cat > "${dest_norm}/${mountfile}" << EOF # Automatically generated by zfs-mount-generator @@ -183,8 +242,8 @@ process_line() { SourcePath=${cachefile} Documentation=man:zfs-mount-generator(8) Before=local-fs.target zfs-mount.service -After=zfs-import.target -Wants=zfs-import.target +After=${wants} +Wants=${wants} [Mount] Where=${p_mountpoint} diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in index 884a69b5b683..e4056a92cd98 100644 --- a/etc/systemd/system/50-zfs.preset.in +++ b/etc/systemd/system/50-zfs.preset.in @@ -5,4 +5,5 @@ enable zfs-import.target enable zfs-mount.service enable zfs-share.service enable zfs-zed.service +enable zfs-volume-wait.service enable zfs.target diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 1586209caa6d..130c6c757a59 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -7,7 +7,9 @@ systemdunit_DATA = \ zfs-import-scan.service \ zfs-mount.service \ zfs-share.service \ + zfs-volume-wait.service \ zfs-import.target \ + zfs-volumes.target \ zfs.target EXTRA_DIST = \ @@ -17,6 +19,8 @@ EXTRA_DIST = \ $(top_srcdir)/etc/systemd/system/zfs-mount.service.in \ $(top_srcdir)/etc/systemd/system/zfs-share.service.in \ $(top_srcdir)/etc/systemd/system/zfs-import.target.in \ + $(top_srcdir)/etc/systemd/system/zfs-volume-wait.service.in \ + $(top_srcdir)/etc/systemd/system/zfs-volumes.target.in \ $(top_srcdir)/etc/systemd/system/zfs.target.in \ $(top_srcdir)/etc/systemd/system/50-zfs.preset.in @@ -27,5 +31,9 @@ $(systemdunit_DATA) $(systemdpreset_DATA):%:%.in -e 's,@sysconfdir\@,$(sysconfdir),g' \ $< >'$@' +install-data-hook: + $(MKDIR_P) "$(DESTDIR)$(systemdunitdir)" + ln -sf /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service" + distclean-local:: -$(RM) $(systemdunit_DATA) $(systemdpreset_DATA) diff --git a/etc/systemd/system/zfs-mount.service.in b/etc/systemd/system/zfs-mount.service.in index a18691a4680a..6507c0765d01 100644 --- a/etc/systemd/system/zfs-mount.service.in +++ b/etc/systemd/system/zfs-mount.service.in @@ -6,6 +6,7 @@ After=systemd-udev-settle.service After=zfs-import.target After=systemd-remount-fs.service Before=local-fs.target +Before=systemd-random-seed.service [Service] Type=oneshot diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in index 75ff6e946767..5f4ba411b3cd 100644 --- a/etc/systemd/system/zfs-share.service.in +++ b/etc/systemd/system/zfs-share.service.in @@ -5,6 +5,7 @@ After=nfs-server.service nfs-kernel-server.service After=smb.service Before=rpc-statd-notify.service Wants=zfs-mount.service +After=zfs-mount.service PartOf=nfs-server.service nfs-kernel-server.service PartOf=smb.service diff --git a/etc/systemd/system/zfs-volume-wait.service.in b/etc/systemd/system/zfs-volume-wait.service.in new file mode 100644 index 000000000000..75bd9fcdd56c --- /dev/null +++ b/etc/systemd/system/zfs-volume-wait.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=Wait for ZFS Volume (zvol) links in /dev +DefaultDependencies=no +After=systemd-udev-settle.service +After=zfs-import.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@bindir@/zvol_wait + +[Install] +WantedBy=zfs-volumes.target diff --git a/etc/systemd/system/zfs-volumes.target.in b/etc/systemd/system/zfs-volumes.target.in new file mode 100644 index 000000000000..5cb9a10f49c5 --- /dev/null +++ b/etc/systemd/system/zfs-volumes.target.in @@ -0,0 +1,7 @@ +[Unit] +Description=ZFS volumes are ready +After=zfs-volume-wait.service +Requires=zfs-volume-wait.service + +[Install] +WantedBy=zfs.target diff --git a/etc/zfs/vdev_id.conf.sas_direct.example b/etc/zfs/vdev_id.conf.sas_direct.example index 0a6f130cb2d9..d17ed149d89b 100644 --- a/etc/zfs/vdev_id.conf.sas_direct.example +++ b/etc/zfs/vdev_id.conf.sas_direct.example @@ -2,7 +2,7 @@ multipath no topology sas_direct phys_per_port 4 -# Additionally create /dev/by-enclousure/ symlinks for enclosure devices +# Additionally create /dev/by-enclosure/ symlinks for enclosure devices enclosure_symlinks yes # PCI_ID HBA PORT CHANNEL NAME diff --git a/include/libzfs.h b/include/libzfs.h index e2ec2d9bce7b..19c16b86a7a2 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright Joyent, Inc. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2016, Intel Corporation. * Copyright 2016 Nexenta Systems, Inc. @@ -147,6 +147,7 @@ typedef enum zfs_error { EZFS_NO_TRIM, /* no active trim */ EZFS_TRIM_NOTSUP, /* device does not support trim */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ + EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_UNKNOWN } zfs_error_t; @@ -424,7 +425,8 @@ typedef enum { extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, int name_flags); extern int zpool_upgrade(zpool_handle_t *, uint64_t); -extern int zpool_get_history(zpool_handle_t *, nvlist_t **); +extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *, + boolean_t *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, unsigned, int); extern int zpool_events_clear(libzfs_handle_t *, int *); @@ -686,7 +688,8 @@ extern int zfs_hold(zfs_handle_t *, const char *, const char *, extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); -extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); +extern uint64_t zvol_volsize_to_reservation(zpool_handle_t *, uint64_t, + nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); diff --git a/include/linux/Makefile.am b/include/linux/Makefile.am index efb49520e63c..2455759e8138 100644 --- a/include/linux/Makefile.am +++ b/include/linux/Makefile.am @@ -7,6 +7,7 @@ KERNEL_H = \ $(top_srcdir)/include/linux/blkdev_compat.h \ $(top_srcdir)/include/linux/utsname_compat.h \ $(top_srcdir)/include/linux/kmap_compat.h \ + $(top_srcdir)/include/linux/simd.h \ $(top_srcdir)/include/linux/simd_x86.h \ $(top_srcdir)/include/linux/simd_aarch64.h \ $(top_srcdir)/include/linux/mod_compat.h \ diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 084ea61ccc9a..ace461dc98f1 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -638,14 +638,6 @@ blk_queue_discard_secure(struct request_queue *q) #endif } -/* - * Default Linux IO Scheduler, - * Setting the scheduler to noop will allow the Linux IO scheduler to - * still perform front and back merging, while leaving the request - * ordering and prioritization to the ZFS IO scheduler. - */ -#define VDEV_SCHEDULER "noop" - /* * A common holder for vdev_bdev_open() is used to relax the exclusive open * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to diff --git a/include/linux/simd.h b/include/linux/simd.h new file mode 100644 index 000000000000..bb5f0f02a9c4 --- /dev/null +++ b/include/linux/simd.h @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2019 Lawrence Livermore National Security, LLC. + */ + +#ifndef _SIMD_H +#define _SIMD_H + +#if defined(__x86) +#include + +#elif defined(__aarch64__) +#include +#else + +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + +#endif +#endif /* _SIMD_H */ diff --git a/include/linux/simd_aarch64.h b/include/linux/simd_aarch64.h index 155ef6205599..7ba308d1543b 100644 --- a/include/linux/simd_aarch64.h +++ b/include/linux/simd_aarch64.h @@ -26,8 +26,11 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_begin() + * kfpu_end() + * kfpu_init() + * kfpu_fini() */ #ifndef _SIMD_AARCH64_H @@ -41,20 +44,20 @@ #if defined(_KERNEL) #include -#define kfpu_begin() \ -{ \ - kernel_neon_begin(); \ -} -#define kfpu_end() \ -{ \ - kernel_neon_end(); \ -} +#define kfpu_allowed() 1 +#define kfpu_begin() kernel_neon_begin() +#define kfpu_end() kernel_neon_end() +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #else /* * fpu dummy methods for userspace */ -#define kfpu_begin() do {} while (0) -#define kfpu_end() do {} while (0) +#define kfpu_allowed() 1 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #endif /* defined(_KERNEL) */ #endif /* __aarch64__ */ diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 12cd7467788e..1bde1d7c9240 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -26,8 +26,11 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_begin() + * kfpu_end() + * kfpu_init() + * kfpu_fini() * * SIMD support: * @@ -37,31 +40,31 @@ * all relevant feature test functions should be called. * * Supported features: - * zfs_sse_available() - * zfs_sse2_available() - * zfs_sse3_available() - * zfs_ssse3_available() - * zfs_sse4_1_available() - * zfs_sse4_2_available() + * zfs_sse_available() + * zfs_sse2_available() + * zfs_sse3_available() + * zfs_ssse3_available() + * zfs_sse4_1_available() + * zfs_sse4_2_available() * - * zfs_avx_available() - * zfs_avx2_available() + * zfs_avx_available() + * zfs_avx2_available() * - * zfs_bmi1_available() - * zfs_bmi2_available() + * zfs_bmi1_available() + * zfs_bmi2_available() * - * zfs_avx512f_available() - * zfs_avx512cd_available() - * zfs_avx512er_available() - * zfs_avx512pf_available() - * zfs_avx512bw_available() - * zfs_avx512dq_available() - * zfs_avx512vl_available() - * zfs_avx512ifma_available() - * zfs_avx512vbmi_available() + * zfs_avx512f_available() + * zfs_avx512cd_available() + * zfs_avx512er_available() + * zfs_avx512pf_available() + * zfs_avx512bw_available() + * zfs_avx512dq_available() + * zfs_avx512vl_available() + * zfs_avx512ifma_available() + * zfs_avx512vbmi_available() * * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers - * also add zfs_avx512vl_available() to feature check. + * also add zfs_avx512vl_available() to feature check. */ #ifndef _SIMD_X86_H @@ -82,6 +85,15 @@ #if defined(_KERNEL) +/* + * Disable the WARN_ON_FPU() macro to prevent additional dependencies + * when providing the kfpu_* functions. Relevant warnings are included + * as appropriate and are unconditionally enabled. + */ +#if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU) +#undef CONFIG_X86_DEBUG_FPU +#endif + #if defined(HAVE_KERNEL_FPU_API_HEADER) #include #include @@ -90,33 +102,242 @@ #include #endif +/* + * The following cases are for kernels which export either the + * kernel_fpu_* or __kernel_fpu_* functions. + */ +#if defined(KERNEL_EXPORTS_X86_FPU) + +#define kfpu_allowed() 1 +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + #if defined(HAVE_UNDERSCORE_KERNEL_FPU) #define kfpu_begin() \ -{ \ - preempt_disable(); \ +{ \ + preempt_disable(); \ __kernel_fpu_begin(); \ } -#define kfpu_end() \ -{ \ - __kernel_fpu_end(); \ - preempt_enable(); \ +#define kfpu_end() \ +{ \ + __kernel_fpu_end(); \ + preempt_enable(); \ } + #elif defined(HAVE_KERNEL_FPU) -#define kfpu_begin() kernel_fpu_begin() +#define kfpu_begin() kernel_fpu_begin() #define kfpu_end() kernel_fpu_end() + #else -/* Kernel doesn't export any kernel_fpu_* functions */ -#include /* For kernel xgetbv() */ -#define kfpu_begin() panic("This code should never run") -#define kfpu_end() panic("This code should never run") -#endif /* defined(HAVE_KERNEL_FPU) */ +/* + * This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then + * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined. + */ +#error "Unreachable kernel configuration" +#endif + +#else /* defined(KERNEL_EXPORTS_X86_FPU) */ + +/* + * When the kernel_fpu_* symbols are unavailable then provide our own + * versions which allow the FPU to be safely used. + */ +#if defined(HAVE_KERNEL_FPU_INTERNAL) + +#include + +extern union fpregs_state **zfs_kfpu_fpregs; + +/* + * Initialize per-cpu variables to store FPU state. + */ +static inline void +kfpu_fini(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (zfs_kfpu_fpregs[cpu] != NULL) { + free_pages((unsigned long)zfs_kfpu_fpregs[cpu], + get_order(sizeof (union fpregs_state))); + } + } + + kfree(zfs_kfpu_fpregs); +} + +static inline int +kfpu_init(void) +{ + zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * + sizeof (union fpregs_state *), GFP_KERNEL); + if (zfs_kfpu_fpregs == NULL) + return (-ENOMEM); + + /* + * The fxsave and xsave operations require 16-/64-byte alignment of + * the target memory. Since kmalloc() provides no alignment + * guarantee instead use alloc_pages_node(). + */ + unsigned int order = get_order(sizeof (union fpregs_state)); + int cpu; + + for_each_possible_cpu(cpu) { + struct page *page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_ZERO, order); + if (page == NULL) { + kfpu_fini(); + return (-ENOMEM); + } + + zfs_kfpu_fpregs[cpu] = page_address(page); + } + + return (0); +} + +#define kfpu_allowed() 1 +#define ex_handler_fprestore ex_handler_default + +/* + * FPU save and restore instructions. + */ +#define __asm __asm__ __volatile__ +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ + : : [addr] "m" (rval)); + +static inline void +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + int err; + + low = mask; + hi = mask >> 32; + XSTATE_XSAVE(addr, low, hi, err); + WARN_ON_ONCE(err); +} + +static inline void +kfpu_save_fxsr(struct fxregs_state *addr) +{ + if (IS_ENABLED(CONFIG_X86_32)) + kfpu_fxsave(addr); + else + kfpu_fxsaveq(addr); +} + +static inline void +kfpu_save_fsave(struct fregs_state *addr) +{ + kfpu_fnsave(addr); +} + +static inline void +kfpu_begin(void) +{ + /* + * Preemption and interrupts must be disabled for the critical + * region where the FPU state is being modified. + */ + preempt_disable(); + local_irq_disable(); + + /* + * The current FPU registers need to be preserved by kfpu_begin() + * and restored by kfpu_end(). They are stored in a dedicated + * per-cpu variable, not in the task struct, this allows any user + * FPU state to be correctly preserved and restored. + */ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_save_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(&state->fxsave); + } else { + kfpu_save_fsave(&state->fsave); + } +} + +static inline void +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + XSTATE_XRESTORE(addr, low, hi); +} + +static inline void +kfpu_restore_fxsr(struct fxregs_state *addr) +{ + /* + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only + * restores the _x87 FOP, FIP, and FDP registers when an exception + * is pending. Clean the _x87 state to force the restore. + */ + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) + kfpu_fxsr_clean(addr); + + if (IS_ENABLED(CONFIG_X86_32)) { + kfpu_fxrstor(addr); + } else { + kfpu_fxrstorq(addr); + } +} + +static inline void +kfpu_restore_fsave(struct fregs_state *addr) +{ + kfpu_frstor(addr); +} + +static inline void +kfpu_end(void) +{ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_restore_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_restore_fxsr(&state->fxsave); + } else { + kfpu_restore_fsave(&state->fsave); + } + + local_irq_enable(); + preempt_enable(); +} #else + +/* + * FPU support is unavailable. + */ +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) + +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */ + +#else /* defined(_KERNEL) */ /* - * fpu dummy methods for userspace + * FPU dummy methods for user space. */ -#define kfpu_begin() do {} while (0) -#define kfpu_end() do {} while (0) +#define kfpu_allowed() 1 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) #endif /* defined(_KERNEL) */ /* @@ -190,7 +411,7 @@ typedef struct cpuid_feature_desc { * Descriptions of supported instruction sets */ static const cpuid_feature_desc_t cpuid_features[] = { - [SSE] = {1U, 0U, 1U << 25, EDX }, + [SSE] = {1U, 0U, 1U << 25, EDX }, [SSE2] = {1U, 0U, 1U << 26, EDX }, [SSE3] = {1U, 0U, 1U << 0, ECX }, [SSSE3] = {1U, 0U, 1U << 9, ECX }, @@ -287,7 +508,6 @@ CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); #endif /* !defined(_KERNEL) */ - /* * Detect register set support */ @@ -298,7 +518,7 @@ __simd_state_enabled(const uint64_t state) uint64_t xcr0; #if defined(_KERNEL) -#if defined(X86_FEATURE_OSXSAVE) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_OSXSAVE) has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE); #else has_osxsave = B_FALSE; @@ -328,11 +548,7 @@ static inline boolean_t zfs_sse_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse()); #endif @@ -345,11 +561,7 @@ static inline boolean_t zfs_sse2_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM2)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse2()); #endif @@ -362,11 +574,7 @@ static inline boolean_t zfs_sse3_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM3)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse3()); #endif @@ -379,11 +587,7 @@ static inline boolean_t zfs_ssse3_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_SSSE3)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_ssse3()); #endif @@ -396,11 +600,7 @@ static inline boolean_t zfs_sse4_1_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM4_1)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse4_1()); #endif @@ -413,11 +613,7 @@ static inline boolean_t zfs_sse4_2_available(void) { #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) return (!!boot_cpu_has(X86_FEATURE_XMM4_2)); -#else - return (B_FALSE); -#endif #elif !defined(_KERNEL) return (__cpuid_has_sse4_2()); #endif @@ -431,11 +627,7 @@ zfs_avx_available(void) { boolean_t has_avx; #if defined(_KERNEL) -#if defined(KERNEL_EXPORTS_X86_FPU) has_avx = !!boot_cpu_has(X86_FEATURE_AVX); -#else - has_avx = B_FALSE; -#endif #elif !defined(_KERNEL) has_avx = __cpuid_has_avx(); #endif @@ -451,11 +643,7 @@ zfs_avx2_available(void) { boolean_t has_avx2; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX2) && defined(KERNEL_EXPORTS_X86_FPU) has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2); -#else - has_avx2 = B_FALSE; -#endif #elif !defined(_KERNEL) has_avx2 = __cpuid_has_avx2(); #endif @@ -470,7 +658,7 @@ static inline boolean_t zfs_bmi1_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_BMI1) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_BMI1) return (!!boot_cpu_has(X86_FEATURE_BMI1)); #else return (B_FALSE); @@ -487,7 +675,7 @@ static inline boolean_t zfs_bmi2_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_BMI2) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_BMI2) return (!!boot_cpu_has(X86_FEATURE_BMI2)); #else return (B_FALSE); @@ -504,7 +692,7 @@ static inline boolean_t zfs_aes_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_AES) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AES) return (!!boot_cpu_has(X86_FEATURE_AES)); #else return (B_FALSE); @@ -521,7 +709,7 @@ static inline boolean_t zfs_pclmulqdq_available(void) { #if defined(_KERNEL) -#if defined(X86_FEATURE_PCLMULQDQ) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_PCLMULQDQ) return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ)); #else return (B_FALSE); @@ -555,7 +743,7 @@ zfs_avx512f_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512F) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512F) has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F); #else has_avx512 = B_FALSE; @@ -574,7 +762,7 @@ zfs_avx512cd_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512CD) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512CD) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512CD); #else @@ -594,7 +782,7 @@ zfs_avx512er_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512ER) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512ER) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512ER); #else @@ -614,7 +802,7 @@ zfs_avx512pf_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512PF) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512PF) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512PF); #else @@ -634,7 +822,7 @@ zfs_avx512bw_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512BW) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512BW) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512BW); #else @@ -654,7 +842,7 @@ zfs_avx512dq_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512DQ) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512DQ) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512DQ); #else @@ -674,7 +862,7 @@ zfs_avx512vl_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512VL) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512VL) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL); #else @@ -694,7 +882,7 @@ zfs_avx512ifma_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512IFMA) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512IFMA) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512IFMA); #else @@ -714,7 +902,7 @@ zfs_avx512vbmi_available(void) boolean_t has_avx512 = B_FALSE; #if defined(_KERNEL) -#if defined(X86_FEATURE_AVX512VBMI) && defined(KERNEL_EXPORTS_X86_FPU) +#if defined(X86_FEATURE_AVX512VBMI) has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VBMI); #else diff --git a/include/linux/vfs_compat.h b/include/linux/vfs_compat.h index 04a2c2b879fe..28b454133c6e 100644 --- a/include/linux/vfs_compat.h +++ b/include/linux/vfs_compat.h @@ -36,7 +36,7 @@ * 2.6.28 API change, * Added insert_inode_locked() helper function, prior to this most callers * used insert_inode_hash(). The older method doesn't check for collisions - * in the inode_hashtable but it still acceptible for use. + * in the inode_hashtable but it still acceptable for use. */ #ifndef HAVE_INSERT_INODE_LOCKED static inline int diff --git a/include/spl/sys/condvar.h b/include/spl/sys/condvar.h index 28caea57181c..f1438c4e2455 100644 --- a/include/spl/sys/condvar.h +++ b/include/spl/sys/condvar.h @@ -54,7 +54,8 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *); extern void __cv_destroy(kcondvar_t *); extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); -extern void __cv_wait_sig(kcondvar_t *, kmutex_t *); +extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *); +extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); @@ -69,6 +70,7 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_destroy(cvp) __cv_destroy(cvp) #define cv_wait(cvp, mp) __cv_wait(cvp, mp) #define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp) +#define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp) #define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp) #define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp) #define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t) diff --git a/include/spl/sys/debug.h b/include/spl/sys/debug.h index b17d77d280a9..ecda6bcb8959 100644 --- a/include/spl/sys/debug.h +++ b/include/spl/sys/debug.h @@ -102,7 +102,7 @@ void spl_dumpstack(void); if (!(_verify3_left OP _verify3_right)) \ spl_panic(__FILE__, __FUNCTION__, __LINE__, \ "VERIFY3(" #LEFT " " #OP " " #RIGHT ") " \ - "failed (%px" #OP " %px)\n", \ + "failed (%px " #OP " %px)\n", \ (void *) (_verify3_left), \ (void *) (_verify3_right)); \ } while (0) diff --git a/include/spl/sys/kmem_cache.h b/include/spl/sys/kmem_cache.h index 8fa14f67e73f..8381b03d8ec2 100644 --- a/include/spl/sys/kmem_cache.h +++ b/include/spl/sys/kmem_cache.h @@ -30,12 +30,12 @@ /* * Slab allocation interfaces. The SPL slab differs from the standard * Linux SLAB or SLUB primarily in that each cache may be backed by slabs - * allocated from the physical or virtal memory address space. The virtual + * allocated from the physical or virtual memory address space. The virtual * slabs allow for good behavior when allocation large objects of identical * size. This slab implementation also supports both constructors and * destructors which the Linux slab does not. */ -enum { +typedef enum kmc_bit { KMC_BIT_NOTOUCH = 0, /* Don't update ages */ KMC_BIT_NODEBUG = 1, /* Default behavior */ KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */ @@ -45,7 +45,6 @@ enum { KMC_BIT_VMEM = 6, /* Use vmem cache */ KMC_BIT_SLAB = 7, /* Use Linux slab cache */ KMC_BIT_OFFSLAB = 8, /* Objects not on slab */ - KMC_BIT_NOEMERGENCY = 9, /* Disable emergency objects */ KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */ KMC_BIT_GROWING = 15, /* Growing in progress */ KMC_BIT_REAPING = 16, /* Reaping in progress */ @@ -53,7 +52,7 @@ enum { KMC_BIT_TOTAL = 18, /* Proc handler helper bit */ KMC_BIT_ALLOC = 19, /* Proc handler helper bit */ KMC_BIT_MAX = 20, /* Proc handler helper bit */ -}; +} kmc_bit_t; /* kmem move callback return values */ typedef enum kmem_cbrc { @@ -73,7 +72,6 @@ typedef enum kmem_cbrc { #define KMC_VMEM (1 << KMC_BIT_VMEM) #define KMC_SLAB (1 << KMC_BIT_SLAB) #define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB) -#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY) #define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED) #define KMC_GROWING (1 << KMC_BIT_GROWING) #define KMC_REAPING (1 << KMC_BIT_REAPING) diff --git a/include/spl/sys/mutex.h b/include/spl/sys/mutex.h index ed0cd4932cfa..73da23685590 100644 --- a/include/spl/sys/mutex.h +++ b/include/spl/sys/mutex.h @@ -127,6 +127,8 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ }) /* END CSTYLED */ +#define NESTED_SINGLE 1 + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define mutex_enter_nested(mp, subclass) \ { \ @@ -179,7 +181,4 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ /* NOTE: do not dereference mp after this point */ \ } -int spl_mutex_init(void); -void spl_mutex_fini(void); - #endif /* _SPL_MUTEX_H */ diff --git a/include/spl/sys/rwlock.h b/include/spl/sys/rwlock.h index 408defac20d3..60f5bfd986b4 100644 --- a/include/spl/sys/rwlock.h +++ b/include/spl/sys/rwlock.h @@ -29,43 +29,6 @@ #include #include -/* Linux kernel compatibility */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (0) -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1) -#elif defined(RWSEM_ACTIVE_MASK) -#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS) -#endif - -/* Linux 3.16 changed activity to count for rwsem-spinlock */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define RWSEM_COUNT(sem) sem->read_depth -#elif defined(HAVE_RWSEM_ACTIVITY) -#define RWSEM_COUNT(sem) sem->activity -/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */ -#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -#define RWSEM_COUNT(sem) atomic_long_read(&(sem)->count) -#else -#define RWSEM_COUNT(sem) sem->count -#endif - -#if defined(RWSEM_SPINLOCK_IS_RAW) -#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) \ - raw_spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) raw_spin_trylock_irqsave(lk, fl) -#else -#define spl_rwsem_lock_irqsave(lk, fl) spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) spin_trylock_irqsave(lk, fl) -#endif /* RWSEM_SPINLOCK_IS_RAW */ - -#define spl_rwsem_is_locked(rwsem) rwsem_is_locked(rwsem) - typedef enum { RW_DRIVER = 2, RW_DEFAULT = 4, @@ -78,15 +41,9 @@ typedef enum { RW_READER = 2 } krw_t; -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner - * field, so we don't need our own. - */ typedef struct { struct rw_semaphore rw_rwlock; -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER kthread_t *rw_owner; -#endif #ifdef CONFIG_LOCKDEP krw_type_t rw_type; #endif /* CONFIG_LOCKDEP */ @@ -97,31 +54,19 @@ typedef struct { static inline void spl_rw_set_owner(krwlock_t *rwp) { -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write, - * downgrade_write and __init_rwsem will set/clear owner for us. - */ -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = current; -#endif } static inline void spl_rw_clear_owner(krwlock_t *rwp) { -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = NULL; -#endif } static inline kthread_t * rw_owner(krwlock_t *rwp) { -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - return (SEM(rwp)->owner); -#else return (rwp->rw_owner); -#endif } #ifdef CONFIG_LOCKDEP @@ -148,62 +93,22 @@ spl_rw_lockdep_on_maybe(krwlock_t *rwp) \ #define spl_rw_lockdep_on_maybe(rwp) #endif /* CONFIG_LOCKDEP */ - static inline int -RW_WRITE_HELD(krwlock_t *rwp) +RW_LOCK_HELD(krwlock_t *rwp) { - return (rw_owner(rwp) == current); + return (rwsem_is_locked(SEM(rwp))); } static inline int -RW_LOCK_HELD(krwlock_t *rwp) +RW_WRITE_HELD(krwlock_t *rwp) { - return (spl_rwsem_is_locked(SEM(rwp))); + return (rw_owner(rwp) == current); } static inline int RW_READ_HELD(krwlock_t *rwp) { - if (!RW_LOCK_HELD(rwp)) - return (0); - - /* - * rw_semaphore cheat sheet: - * - * < 3.16: - * There's no rw_semaphore.owner, so use rwp.owner instead. - * If rwp.owner == NULL then it's a reader - * - * 3.16 - 4.7: - * rw_semaphore.owner added (https://lwn.net/Articles/596656/) - * and CONFIG_RWSEM_SPIN_ON_OWNER introduced. - * If rw_semaphore.owner == NULL then it's a reader - * - * 4.8 - 4.16.16: - * RWSEM_READER_OWNED added as an internal #define. - * (https://lore.kernel.org/patchwork/patch/678590/) - * If rw_semaphore.owner == 1 then it's a reader - * - * 4.16.17 - 4.19: - * RWSEM_OWNER_UNKNOWN introduced as ((struct task_struct *)-1L) - * (https://do-db2.lkml.org/lkml/2018/5/15/985) - * If rw_semaphore.owner == 1 then it's a reader. - * - * 4.20+: - * RWSEM_OWNER_UNKNOWN changed to ((struct task_struct *)-2L) - * (https://lkml.org/lkml/2018/9/6/986) - * If rw_semaphore.owner & 1 then it's a reader, and also the reader's - * task_struct may be embedded in rw_semaphore->owner. - */ -#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && defined(RWSEM_OWNER_UNKNOWN) - if (RWSEM_OWNER_UNKNOWN == (struct task_struct *)-2L) { - /* 4.20+ kernels with CONFIG_RWSEM_SPIN_ON_OWNER */ - return ((unsigned long) SEM(rwp)->owner & 1); - } -#endif - - /* < 4.20 kernel or !CONFIG_RWSEM_SPIN_ON_OWNER */ - return (rw_owner(rwp) == NULL || (unsigned long) rw_owner(rwp) == 1); + return (RW_LOCK_HELD(rwp) && rw_owner(rwp) == NULL); } /* @@ -228,6 +133,12 @@ RW_READ_HELD(krwlock_t *rwp) */ #define rw_destroy(rwp) ((void) 0) +/* + * Upgrading a rwsem from a reader to a writer is not supported by the + * Linux kernel. The lock must be dropped and reacquired as a writer. + */ +#define rw_tryupgrade(rwp) RW_WRITE_HELD(rwp) + #define rw_tryenter(rwp, rw) \ ({ \ int _rc_ = 0; \ @@ -285,25 +196,6 @@ RW_READ_HELD(krwlock_t *rwp) downgrade_write(SEM(rwp)); \ spl_rw_lockdep_on_maybe(rwp); \ }) - -#define rw_tryupgrade(rwp) \ -({ \ - int _rc_ = 0; \ - \ - if (RW_WRITE_HELD(rwp)) { \ - _rc_ = 1; \ - } else { \ - spl_rw_lockdep_off_maybe(rwp); \ - if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \ - spl_rw_set_owner(rwp); \ - spl_rw_lockdep_on_maybe(rwp); \ - } \ - _rc_; \ -}) /* END CSTYLED */ -int spl_rw_init(void); -void spl_rw_fini(void); -int rwsem_tryupgrade(struct rw_semaphore *rwsem); - #endif /* _SPL_RWLOCK_H */ diff --git a/include/spl/sys/strings.h b/include/spl/sys/strings.h index 4fb80320635c..8b810c9af24f 100644 --- a/include/spl/sys/strings.h +++ b/include/spl/sys/strings.h @@ -28,4 +28,8 @@ #define bcopy(src, dest, size) memmove(dest, src, size) #define bcmp(src, dest, size) memcmp((src), (dest), (size_t)(size)) +#ifndef HAVE_KSTRTOUL +#define kstrtoul strict_strtoul +#endif + #endif /* _SPL_SYS_STRINGS_H */ diff --git a/include/spl/sys/vnode.h b/include/spl/sys/vnode.h index 71278b08c867..7bd278e4e13b 100644 --- a/include/spl/sys/vnode.h +++ b/include/spl/sys/vnode.h @@ -182,7 +182,6 @@ extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, extern file_t *vn_getf(int fd); extern void vn_releasef(int fd); extern void vn_areleasef(int fd, uf_info_t *fip); -extern int vn_set_pwd(const char *filename); int spl_vn_init(void); void spl_vn_fini(void); diff --git a/include/sys/abd.h b/include/sys/abd.h index 3d9fdbf102aa..b781be4da700 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2014 by Chunwei Chen. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright (c) 2016, 2019 by Delphix. All rights reserved. */ #ifndef _ABD_H @@ -44,7 +44,8 @@ typedef enum abd_flags { ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */ ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */ ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */ - ABD_FLAG_MULTI_CHUNK = 1 << 4 /* pages split over multiple chunks */ + ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */ + ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */ } abd_flags_t; typedef struct abd { @@ -60,6 +61,7 @@ typedef struct abd { } abd_scatter; struct abd_linear { void *abd_buf; + struct scatterlist *abd_sgl; /* for LINEAR_PAGE */ } abd_linear; } abd_u; } abd_t; @@ -75,6 +77,13 @@ abd_is_linear(abd_t *abd) return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE); } +static inline boolean_t +abd_is_linear_page(abd_t *abd) +{ + return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ? + B_TRUE : B_FALSE); +} + /* * Allocations and deallocations */ diff --git a/include/sys/arc.h b/include/sys/arc.h index dc2fd03647f3..d7bb44b02001 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -187,7 +187,7 @@ typedef enum arc_buf_contents { } arc_buf_contents_t; /* - * The following breakdows of arc_size exist for kstat only. + * The following breakdowns of arc_size exist for kstat only. */ typedef enum arc_space_type { ARC_SPACE_DATA, diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index cd42c0c01a20..c8f551db731d 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -39,7 +39,7 @@ extern "C" { * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) * ARC_mru - recently used, currently cached - * ARC_mru_ghost - recentely used, no longer in cache + * ARC_mru_ghost - recently used, no longer in cache * ARC_mfu - frequently used, currently cached * ARC_mfu_ghost - frequently used, no longer in cache * ARC_l2c_only - exists in L2ARC but not other states diff --git a/include/sys/avl.h b/include/sys/avl.h index 206b539fab54..962e8b1cfb6f 100644 --- a/include/sys/avl.h +++ b/include/sys/avl.h @@ -97,7 +97,7 @@ extern "C" { * * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes. * Note that once you use avl_destroy_nodes(), you can no longer - * use any routine except avl_destroy_nodes() and avl_destoy(). + * use any routine except avl_destroy_nodes() and avl_destroy(). * * 4. Use avl_destroy() to destroy the AVL tree itself. * @@ -144,7 +144,7 @@ typedef uintptr_t avl_index_t; * user data structure which must contain a field of type avl_node_t. * * Also assume the user data structures looks like: - * stuct my_type { + * struct my_type { * ... * avl_node_t my_link; * ... diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 88c83617178d..dd8d12376cc5 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -465,7 +465,7 @@ int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, /* * Set the data blocksize for an object. * - * The object cannot have any blocks allcated beyond the first. If + * The object cannot have any blocks allocated beyond the first. If * the first block is allocated already, the new size must be greater * than the current block size. If these conditions are not met, * ENOTSUP will be returned. diff --git a/include/sys/dnode.h b/include/sys/dnode.h index c60258bbc768..e97e40373b4d 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -46,6 +46,7 @@ extern "C" { */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 +#define DNODE_DRY_RUN 4 /* * dnode_next_offset() flags. @@ -415,6 +416,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots, boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); +int dnode_try_claim(objset_t *os, uint64_t object, int slots); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, @@ -532,11 +534,6 @@ typedef struct dnode_stats { * a range of dnode slots which would overflow the dnode_phys_t. */ kstat_named_t dnode_hold_free_overflow; - /* - * Number of times a dnode_hold(...) was attempted on a dnode - * which had already been unlinked in an earlier txg. - */ - kstat_named_t dnode_hold_free_txg; /* * Number of times dnode_free_interior_slots() needed to retry * acquiring a slot zrl lock due to contention. diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h index 3cdad7441407..ea7d70cf3232 100644 --- a/include/sys/dsl_bookmark.h +++ b/include/sys/dsl_bookmark.h @@ -37,9 +37,11 @@ typedef struct zfs_bookmark_phys { uint64_t zbm_creation_txg; /* birth transaction group */ uint64_t zbm_creation_time; /* bookmark creation time */ - /* the following fields are reserved for redacted send / recv */ + /* fields used for redacted send / recv */ uint64_t zbm_redaction_obj; /* redaction list object */ uint64_t zbm_flags; /* ZBM_FLAG_* */ + + /* fields used for bookmark written size */ uint64_t zbm_referenced_bytes_refd; uint64_t zbm_compressed_bytes_refd; uint64_t zbm_uncompressed_bytes_refd; diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index c2c0a548a488..0f73ea6c6df8 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -209,7 +209,6 @@ void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx); -int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index 345d2754fb65..032f7f3e2d11 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_DSL_SCAN_H @@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); int dsl_scan(struct dsl_pool *, pool_scan_func_t); +void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); -void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); +void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); +boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_entry_t *dde, dmu_tx_t *tx); diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h index da6c7a40daca..957963ffe553 100644 --- a/include/sys/dsl_synctask.h +++ b/include/sys/dsl_synctask.h @@ -37,6 +37,7 @@ struct dsl_pool; typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *); typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *); +typedef void (dsl_sigfunc_t)(void *, dmu_tx_t *); typedef enum zfs_space_check { /* @@ -116,6 +117,8 @@ int dsl_early_sync_task(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, void *, int, zfs_space_check_t); void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *, void *, int, zfs_space_check_t, dmu_tx_t *); +int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, + dsl_sigfunc_t *, void *, int, zfs_space_check_t); #ifdef __cplusplus } diff --git a/include/sys/efi_partition.h b/include/sys/efi_partition.h index 684b3e588a16..88bdfd2b1ca3 100644 --- a/include/sys/efi_partition.h +++ b/include/sys/efi_partition.h @@ -297,11 +297,11 @@ typedef struct efi_gpe { * checksums, and perform any necessary byte-swapping to the on-disk * format. */ -/* Solaris library abstraction for EFI partitons */ +/* Solaris library abstraction for EFI partitions */ typedef struct dk_part { diskaddr_t p_start; /* starting LBA */ diskaddr_t p_size; /* size in blocks */ - struct uuid p_guid; /* partion type GUID */ + struct uuid p_guid; /* partition type GUID */ ushort_t p_tag; /* converted to part'n type GUID */ ushort_t p_flag; /* attributes */ char p_name[EFI_PART_NAME_LEN]; /* partition name */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index fde42844447e..459e1a1eee3b 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -964,7 +964,7 @@ typedef struct pool_scan_stat { /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ - uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */ + uint64_t pss_pass_scrub_pause; /* pause time of a scrub pass */ /* cumulative time scrub spent paused, needed for rate calculation */ uint64_t pss_pass_scrub_spent_paused; uint64_t pss_pass_issued; /* issued bytes per scan pass */ @@ -1047,7 +1047,7 @@ typedef struct vdev_stat { uint64_t vs_fragmentation; /* device fragmentation */ uint64_t vs_initialize_bytes_done; /* bytes initialized */ uint64_t vs_initialize_bytes_est; /* total bytes to initialize */ - uint64_t vs_initialize_state; /* vdev_initialzing_state_t */ + uint64_t vs_initialize_state; /* vdev_initializing_state_t */ uint64_t vs_initialize_action_time; /* time_t */ uint64_t vs_checkpoint_space; /* checkpoint-consumed space */ uint64_t vs_resilver_deferred; /* resilver deferred */ @@ -1337,6 +1337,7 @@ typedef enum { ZFS_ERR_FROM_IVSET_GUID_MISSING, ZFS_ERR_FROM_IVSET_GUID_MISMATCH, ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, + ZFS_ERR_EXPORT_IN_PROGRESS, } zfs_errno_t; /* diff --git a/include/sys/lua/luaconf.h b/include/sys/lua/luaconf.h index 302c57a8c4b3..fa7861336fc0 100644 --- a/include/sys/lua/luaconf.h +++ b/include/sys/lua/luaconf.h @@ -495,7 +495,7 @@ extern int64_t lcompat_pow(int64_t, int64_t); ** a single double value, using NaN values to represent non-number ** values. The trick only works on 32-bit machines (ints and pointers ** are 32-bit values) with numbers represented as IEEE 754-2008 doubles -** with conventional endianess (12345678 or 87654321), in CPUs that do +** with conventional endianness (12345678 or 87654321), in CPUs that do ** not produce signaling NaN values (all NaNs are quiet). */ diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 2790d06c71d2..330902529664 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -50,6 +50,7 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, void metaslab_fini(metaslab_t *); int metaslab_load(metaslab_t *); +void metaslab_potentially_unload(metaslab_t *, uint64_t); void metaslab_unload(metaslab_t *); uint64_t metaslab_allocated_space(metaslab_t *); diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 439540685971..0c7b4075d9a3 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -89,6 +89,8 @@ void multilist_sublist_insert_head(multilist_sublist_t *, void *); void multilist_sublist_insert_tail(multilist_sublist_t *, void *); void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj); void multilist_sublist_remove(multilist_sublist_t *, void *); +int multilist_sublist_is_empty(multilist_sublist_t *); +int multilist_sublist_is_empty_idx(multilist_t *, unsigned int); void *multilist_sublist_head(multilist_sublist_t *); void *multilist_sublist_tail(multilist_sublist_t *); diff --git a/include/sys/pathname.h b/include/sys/pathname.h index 5db69b1784c9..d79cc5c01afd 100644 --- a/include/sys/pathname.h +++ b/include/sys/pathname.h @@ -54,8 +54,10 @@ extern "C" { */ typedef struct pathname { char *pn_buf; /* underlying storage */ +#if 0 /* unused in ZoL */ char *pn_path; /* remaining pathname */ size_t pn_pathlen; /* remaining length */ +#endif size_t pn_bufsize; /* total size of pn_buf */ } pathname_t; diff --git a/include/sys/refcount.h b/include/sys/refcount.h index e982faeba0f2..c8f586230392 100644 --- a/include/sys/refcount.h +++ b/include/sys/refcount.h @@ -44,7 +44,7 @@ extern "C" { #ifdef ZFS_DEBUG typedef struct reference { list_node_t ref_link; - void *ref_holder; + const void *ref_holder; uint64_t ref_number; uint8_t *ref_removed; } reference_t; @@ -70,16 +70,17 @@ void zfs_refcount_destroy(zfs_refcount_t *); void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t); int zfs_refcount_is_zero(zfs_refcount_t *); int64_t zfs_refcount_count(zfs_refcount_t *); -int64_t zfs_refcount_add(zfs_refcount_t *, void *); -int64_t zfs_refcount_remove(zfs_refcount_t *, void *); -int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *); -int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *); +int64_t zfs_refcount_add(zfs_refcount_t *, const void *); +int64_t zfs_refcount_remove(zfs_refcount_t *, const void *); +int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *); +int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *); void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *); -void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *); +void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *, + const void *); void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t, - void *, void *); -boolean_t zfs_refcount_held(zfs_refcount_t *, void *); -boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *); + const void *, const void *); +boolean_t zfs_refcount_held(zfs_refcount_t *, const void *); +boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *); void zfs_refcount_init(void); void zfs_refcount_fini(void); diff --git a/include/sys/sa.h b/include/sys/sa.h index 50b90622164b..432e0bc415c9 100644 --- a/include/sys/sa.h +++ b/include/sys/sa.h @@ -51,7 +51,7 @@ typedef uint16_t sa_attr_type_t; typedef struct sa_attr_reg { char *sa_name; /* attribute name */ uint16_t sa_length; - sa_bswap_type_t sa_byteswap; /* bswap functon enum */ + sa_bswap_type_t sa_byteswap; /* bswap function enum */ sa_attr_type_t sa_attr; /* filled in during registration */ } sa_attr_reg_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 23434edbc72e..4c1dcdcc1336 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -26,7 +26,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -401,8 +401,8 @@ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { BP_EMBEDDED_TYPE_DATA, - BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ - NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED + BP_EMBEDDED_TYPE_RESERVED, /* Reserved for Delphix byteswap feature. */ + NUM_BP_EMBEDDED_TYPES } bp_embedded_type_t; #define BPE_NUM_WORDS 14 @@ -777,6 +777,7 @@ extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); extern void spa_async_suspend(spa_t *spa); extern void spa_async_resume(spa_t *spa); +extern int spa_async_tasks(spa_t *spa); extern spa_t *spa_inject_addref(char *pool); extern void spa_inject_delref(spa_t *spa); extern void spa_scan_stat_init(spa_t *spa); @@ -976,8 +977,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid, /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw); -extern void spa_config_exit(spa_t *spa, int locks, void *tag); +extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw); +extern void spa_config_exit(spa_t *spa, int locks, const void *tag); extern int spa_config_held(spa_t *spa, int locks, krw_t rw); /* Pool vdev add/remove lock */ @@ -1091,7 +1092,6 @@ extern boolean_t spa_has_checkpoint(spa_t *spa); extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa); extern boolean_t spa_suspend_async_destroy(spa_t *spa); extern uint64_t spa_min_claim_txg(spa_t *spa); -extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp); extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp); typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size, @@ -1104,7 +1104,7 @@ extern uint64_t spa_missing_tvds_allowed(spa_t *spa); extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); -extern unsigned long spa_get_hostid(void); +extern uint32_t spa_get_hostid(spa_t *spa); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern int spa_mode(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index f0e901dfb65c..71bf5f69ab88 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -220,6 +220,7 @@ struct spa { spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ + boolean_t spa_is_exporting; /* true while exporting pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_special_class; /* special allocation class */ @@ -270,7 +271,9 @@ struct spa { boolean_t spa_extreme_rewind; /* rewind past deferred frees */ kmutex_t spa_scrub_lock; /* resilver/scrub lock */ uint64_t spa_scrub_inflight; /* in-flight scrub bytes */ - uint64_t spa_load_verify_ios; /* in-flight verification IOs */ + + /* in-flight verification bytes */ + uint64_t spa_load_verify_bytes; kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */ uint8_t spa_scrub_active; /* active or suspended? */ uint8_t spa_scrub_type; /* type of scrub we're doing */ @@ -398,6 +401,7 @@ struct spa { mmp_thread_t spa_mmp; /* multihost mmp thread */ list_t spa_leaf_list; /* list of leaf vdevs */ uint64_t spa_leaf_list_gen; /* track leaf_list changes */ + uint32_t spa_hostid; /* cached system hostid */ /* * spa_refcount & spa_config_lock must be the last elements diff --git a/include/sys/txg.h b/include/sys/txg.h index 760d5208bf4a..260a3b43cfe8 100644 --- a/include/sys/txg.h +++ b/include/sys/txg.h @@ -87,6 +87,11 @@ extern void txg_kick(struct dsl_pool *dp); */ extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg); +/* + * Wait as above. Returns true if the thread was signaled while waiting. + */ +extern boolean_t txg_wait_synced_sig(struct dsl_pool *dp, uint64_t txg); + /* * Wait until the given transaction group, or one after it, is * the open transaction group. Try to make this happen as soon diff --git a/include/sys/txg_impl.h b/include/sys/txg_impl.h index 4e05214919d7..047d51b94c66 100644 --- a/include/sys/txg_impl.h +++ b/include/sys/txg_impl.h @@ -43,7 +43,7 @@ extern "C" { * the number of active transaction holds (tc_count). As transactions * are assigned into a transaction group the appropriate tc_count is * incremented to indicate that there are pending changes that have yet - * to quiesce. Consumers evenutally call txg_rele_to_sync() to decrement + * to quiesce. Consumers eventually call txg_rele_to_sync() to decrement * the tc_count. A transaction group is not considered quiesced until all * tx_cpu structures have reached a tc_count of zero. * @@ -78,7 +78,7 @@ struct tx_cpu { /* * The tx_state structure maintains the state information about the different - * stages of the pool's transcation groups. A per pool tx_state structure + * stages of the pool's transaction groups. A per pool tx_state structure * is used to track this information. The tx_state structure also points to * an array of tx_cpu structures (described above). Although the tx_sync_lock * is used to protect the members of this structure, it is not used to diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 67ca0d116147..339a48861c19 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_VDEV_H @@ -151,7 +152,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); -extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd); +extern void vdev_defer_resilver(vdev_t *vd); +extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx); typedef enum vdev_config_flag { VDEV_CONFIG_SPARE = 1 << 0, diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h index 83c55c989b5a..845939329b6b 100644 --- a/include/sys/vdev_raidz.h +++ b/include/sys/vdev_raidz.h @@ -52,7 +52,7 @@ int vdev_raidz_reconstruct(struct raidz_map *, const int *, int); */ void vdev_raidz_math_init(void); void vdev_raidz_math_fini(void); -struct raidz_impl_ops *vdev_raidz_math_get_ops(void); +const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); int vdev_raidz_math_generate(struct raidz_map *); int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *, const int); diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h index 603050db6450..bcde2adb3867 100644 --- a/include/sys/vdev_raidz_impl.h +++ b/include/sys/vdev_raidz_impl.h @@ -117,7 +117,7 @@ typedef struct raidz_map { uintptr_t rm_reports; /* # of referencing checksum reports */ uint8_t rm_freed; /* map no longer has referencing ZIO */ uint8_t rm_ecksuminjected; /* checksum error was injected */ - raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ + const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ } raidz_map_t; #endif @@ -186,7 +186,7 @@ extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; * raidz_parity Returns parity of the RAIDZ block * raidz_ncols Returns number of columns the block spans * Note, all rows have the same number of columns. - * raidz_nbigcols Returns number of big columns columns + * raidz_nbigcols Returns number of big columns * raidz_col_p Returns pointer to a column * raidz_col_size Returns size of a column * raidz_big_size Returns size of big columns diff --git a/include/sys/vdev_removal.h b/include/sys/vdev_removal.h index 3962237afdab..e3bab0658d62 100644 --- a/include/sys/vdev_removal.h +++ b/include/sys/vdev_removal.h @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2014, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_REMOVAL_H @@ -81,13 +81,13 @@ extern void spa_vdev_condense_suspend(spa_t *); extern int spa_vdev_remove(spa_t *, uint64_t, boolean_t); extern void free_from_removing_vdev(vdev_t *, uint64_t, uint64_t); extern int spa_removal_get_stats(spa_t *, pool_removal_stat_t *); -extern void svr_sync(spa_t *spa, dmu_tx_t *tx); +extern void svr_sync(spa_t *, dmu_tx_t *); extern void spa_vdev_remove_suspend(spa_t *); extern int spa_vdev_remove_cancel(spa_t *); -extern void spa_vdev_removal_destroy(spa_vdev_removal_t *svr); +extern void spa_vdev_removal_destroy(spa_vdev_removal_t *); +extern uint64_t spa_remove_max_segment(spa_t *); extern int vdev_removal_max_span; -extern int zfs_remove_max_segment; #ifdef __cplusplus } diff --git a/include/sys/zap.h b/include/sys/zap.h index ab13652d8c07..b19b4643879c 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -350,6 +350,7 @@ typedef struct zap_cursor { uint64_t zc_serialized; uint64_t zc_hash; uint32_t zc_cd; + boolean_t zc_prefetch; } zap_cursor_t; typedef struct { @@ -375,7 +376,9 @@ typedef struct { * Initialize a zap cursor, pointing to the "first" attribute of the * zapobj. You must _fini the cursor when you are done with it. */ -void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj); +void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); +void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, + uint64_t zapobj); void zap_cursor_fini(zap_cursor_t *zc); /* diff --git a/include/sys/zcp.h b/include/sys/zcp.h index b9c8ef0069f1..5cc520da5c56 100644 --- a/include/sys/zcp.h +++ b/include/sys/zcp.h @@ -52,6 +52,12 @@ typedef struct zcp_cleanup_handler { list_node_t zch_node; } zcp_cleanup_handler_t; +typedef struct zcp_alloc_arg { + boolean_t aa_must_succeed; + int64_t aa_alloc_remaining; + int64_t aa_alloc_limit; +} zcp_alloc_arg_t; + typedef struct zcp_run_info { dsl_pool_t *zri_pool; @@ -93,6 +99,11 @@ typedef struct zcp_run_info { */ boolean_t zri_timed_out; + /* + * Channel program was canceled by user + */ + boolean_t zri_canceled; + /* * Boolean indicating whether or not we are running in syncing * context. @@ -104,6 +115,26 @@ typedef struct zcp_run_info { * triggered in the event of a fatal error. */ list_t zri_cleanup_handlers; + + /* + * The Lua state context of our channel program. + */ + lua_State *zri_state; + + /* + * Lua memory allocator arguments. + */ + zcp_alloc_arg_t *zri_allocargs; + + /* + * Contains output values from zcp script or error string. + */ + nvlist_t *zri_outnvl; + + /* + * The errno number returned to caller of zcp_eval(). + */ + int zri_result; } zcp_run_info_t; zcp_run_info_t *zcp_run_info(lua_State *); @@ -118,7 +149,7 @@ typedef struct zcp_arg { /* * The name of this argument. For keyword arguments this is the name * functions will use to set the argument. For positional arguments - * the name has no programatic meaning, but will appear in error + * the name has no programmatic meaning, but will appear in error * messages and help output. */ const char *za_name; diff --git a/include/sys/zfs_acl.h b/include/sys/zfs_acl.h index 6d3db5041608..747f4e57e2a0 100644 --- a/include/sys/zfs_acl.h +++ b/include/sys/zfs_acl.h @@ -62,7 +62,7 @@ struct znode_phys; /* * All ACEs have a common hdr. For * owner@, group@, and everyone@ this is all - * thats needed. + * that's needed. */ typedef struct zfs_ace_hdr { uint16_t z_type; diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index e3fa2e61bdc9..def9de781460 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -257,6 +257,8 @@ extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); +#define NESTED_SINGLE 1 +#define mutex_enter_nested(mp, class) mutex_enter(mp) /* * RW locks */ @@ -305,6 +307,7 @@ typedef pthread_cond_t kcondvar_t; extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag); @@ -313,8 +316,8 @@ extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at) -#define cv_wait_sig(cv, mp) cv_wait(cv, mp) #define cv_wait_io(cv, mp) cv_wait(cv, mp) +#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp) #define cv_timedwait_sig_hires(cv, mp, t, r, f) \ cv_timedwait_hires(cv, mp, t, r, f) diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 05b080843d72..5f1e2a364e48 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -66,13 +66,13 @@ typedef struct locked_range { uint8_t lr_read_wanted; /* reader wants to lock this range */ } locked_range_t; -void rangelock_init(rangelock_t *, rangelock_cb_t *, void *); -void rangelock_fini(rangelock_t *); +void zfs_rangelock_init(rangelock_t *, rangelock_cb_t *, void *); +void zfs_rangelock_fini(rangelock_t *); -locked_range_t *rangelock_enter(rangelock_t *, +locked_range_t *zfs_rangelock_enter(rangelock_t *, uint64_t, uint64_t, rangelock_type_t); -void rangelock_exit(locked_range_t *); -void rangelock_reduce(locked_range_t *, uint64_t, uint64_t); +void zfs_rangelock_exit(locked_range_t *); +void zfs_rangelock_reduce(locked_range_t *, uint64_t, uint64_t); #ifdef __cplusplus } diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index 42f534f5db62..457d027baf9b 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -46,7 +46,7 @@ struct znode; /* * This structure emulates the vfs_t from other platforms. It's purpose - * is to faciliate the handling of mount options and minimize structural + * is to facilitate the handling of mount options and minimize structural * differences between the platforms. */ typedef struct vfs { @@ -105,7 +105,7 @@ struct zfsvfs { list_t z_all_znodes; /* all znodes in the fs */ uint64_t z_nr_znodes; /* number of znodes in the fs */ unsigned long z_rollback_time; /* last online rollback time */ - unsigned long z_snap_defer_time; /* last snapshot unmount deferal */ + unsigned long z_snap_defer_time; /* last snapshot unmount deferral */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ arc_prune_t *z_arc_prune; /* called by ARC to prune caches */ struct inode *z_ctldir; /* .zfs directory inode */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d4a3ea769331..ced5a73867ae 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -192,10 +192,15 @@ typedef struct znode { krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ rangelock_t z_rangelock; /* file range locks */ - uint8_t z_unlinked; /* file has been unlinked */ - uint8_t z_atime_dirty; /* atime needs to be synced */ - uint8_t z_zn_prefetch; /* Prefetch znodes? */ - uint8_t z_moved; /* Has this znode been moved? */ + boolean_t z_unlinked; /* file has been unlinked */ + boolean_t z_atime_dirty; /* atime needs to be synced */ + boolean_t z_zn_prefetch; /* Prefetch znodes? */ + boolean_t z_moved; /* Has this znode been moved? */ + boolean_t z_is_sa; /* are we native sa? */ + boolean_t z_is_mapped; /* are we mmap'ed */ + boolean_t z_is_ctldir; /* are we .zfs entry */ + boolean_t z_is_stale; /* are we stale due to rollback? */ + boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ uint64_t z_mapcnt; /* number of pages mapped to file */ @@ -212,10 +217,6 @@ typedef struct znode { uint64_t z_projid; /* project ID */ list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ - boolean_t z_is_sa; /* are we native sa? */ - boolean_t z_is_mapped; /* are we mmap'ed */ - boolean_t z_is_ctldir; /* are we .zfs entry */ - boolean_t z_is_stale; /* are we stale due to rollback? */ struct inode z_inode; /* generic vfs inode */ } znode_t; @@ -371,7 +372,7 @@ extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, vattr_t *vap); extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid); + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked); #define ZFS_NO_OBJECT 0 /* no object id */ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); diff --git a/include/sys/zil.h b/include/sys/zil.h index fb7b38a066f4..6b038a9dd228 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -80,7 +80,7 @@ typedef struct zil_header { * Log blocks are chained together. Originally they were chained at the * end of the block. For performance reasons the chain was moved to the * beginning of the block which allows writes for only the data being used. - * The older position is supported for backwards compatability. + * The older position is supported for backwards compatibility. * * The zio_eck_t contains a zec_cksum which for the intent log is * the sequence number of this log block. A seq of 0 is invalid. @@ -421,7 +421,7 @@ typedef struct zil_stats { /* * Number of transactions (reads, writes, renames, etc.) - * that have been commited. + * that have been committed. */ kstat_named_t zil_itx_count; @@ -515,6 +515,9 @@ extern void zil_set_sync(zilog_t *zilog, uint64_t syncval); extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); +extern uint64_t zil_max_copied_data(zilog_t *zilog); +extern uint64_t zil_max_log_data(zilog_t *zilog); + extern int zil_replay_disable; #ifdef __cplusplus diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h index 174fef334128..d2f4018653a6 100644 --- a/include/sys/zil_impl.h +++ b/include/sys/zil_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -209,6 +209,13 @@ struct zilog { uint_t zl_prev_rotor; /* rotor for zl_prev[] */ txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */ uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */ + /* + * Max block size for this ZIL. Note that this can not be changed + * while the ZIL is in use because consumers (ZPL/zvol) need to take + * this into account when deciding between WR_COPIED and WR_NEED_COPY + * (see zil_max_copied_data()). + */ + uint64_t zl_max_block_size; }; typedef struct zil_bp_node { @@ -216,26 +223,6 @@ typedef struct zil_bp_node { avl_node_t zn_node; } zil_bp_node_t; -/* - * Maximum amount of write data that can be put into single log block. - */ -#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ - sizeof (lr_write_t)) - -/* - * Maximum amount of log space we agree to waste to reduce number of - * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). - */ -#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8) - -/* - * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY - * as more space efficient if we can't fit at least two log records into - * maximum sized log block. - */ -#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \ - sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t)) - #ifdef __cplusplus } #endif diff --git a/include/sys/zio.h b/include/sys/zio.h index e69bf9208039..aa58fe1fafd6 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -278,6 +278,7 @@ enum zio_wait_type { typedef void zio_done_func_t(zio_t *zio); +extern int zio_exclude_metadata; extern int zio_dva_throttle_enabled; extern const char *zio_type_name[ZIO_TYPES]; diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h index 1642823d3d42..208117eee4b5 100644 --- a/include/sys/zio_compress.h +++ b/include/sys/zio_compress.h @@ -105,8 +105,7 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); -extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len, - int level); + /* * Compress and decompress data if necessary. */ diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h index d54e2fe192fa..a029127914b2 100644 --- a/include/sys/zio_crypt.h +++ b/include/sys/zio_crypt.h @@ -55,7 +55,7 @@ typedef struct zio_crypt_info { /* length of the encryption key */ size_t ci_keylen; - /* human-readable name of the encryption alforithm */ + /* human-readable name of the encryption algorithm */ char *ci_name; } zio_crypt_info_t; @@ -78,7 +78,7 @@ typedef struct zio_crypt_key { /* buffer for hmac key */ uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN]; - /* buffer for currrent encryption key derived from master key */ + /* buffer for current encryption key derived from master key */ uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN]; /* current 64 bit salt for deriving an encryption key */ @@ -99,7 +99,7 @@ typedef struct zio_crypt_key { /* template of hmac key for illumos crypto api */ crypto_ctx_template_t zk_hmac_tmpl; - /* lock for changing the salt and dependant values */ + /* lock for changing the salt and dependent values */ krwlock_t zk_salt_lock; } zio_crypt_key_t; diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index fbbe06eb04f8..8ca12463176d 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -87,7 +87,7 @@ extern "C" { * * NOP Write: * The NOP write feature is performed by the ZIO_STAGE_NOP_WRITE stage - * and is added to an existing write pipeline if a crypographically + * and is added to an existing write pipeline if a cryptographically * secure checksum (i.e. SHA256) is enabled and compression is turned on. * The NOP write stage will compare the checksums of the current data * on-disk (level-0 blocks only) and the data that is currently being written. diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h index 1360d6e1c171..7cdc6d6938ae 100644 --- a/include/zfs_comutil.h +++ b/include/zfs_comutil.h @@ -34,6 +34,7 @@ extern "C" { #endif extern boolean_t zfs_allocatable_devs(nvlist_t *); +extern boolean_t zfs_special_devs(nvlist_t *); extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *); extern int zfs_zpl_version_map(int spa_version); diff --git a/include/zfs_namecheck.h b/include/zfs_namecheck.h index 527db92b0cfa..56d3d36f026e 100644 --- a/include/zfs_namecheck.h +++ b/include/zfs_namecheck.h @@ -43,6 +43,8 @@ typedef enum { NAME_ERR_RESERVED, /* entire name is reserved */ NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ NAME_ERR_TOOLONG, /* name is too long */ + NAME_ERR_SELF_REF, /* reserved self path name ('.') */ + NAME_ERR_PARENT_REF, /* reserved parent path name ('..') */ NAME_ERR_NO_AT, /* permission set is missing '@' */ } namecheck_err_t; diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 1d8f631c837d..d880011c7a52 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -42,6 +42,7 @@ #include #include #include +#include #include static struct uuid_to_ptag { @@ -223,7 +224,7 @@ efi_get_info(int fd, struct dk_cinfo *dki_info) /* * The simplest way to get the partition number under linux is - * to parse it out of the /dev/ block device name. + * to parse it out of the /dev/ block device name. * The kernel creates this using the partition number when it * populates /dev/ so it may be trusted. The tricky bit here is * that the naming convention is based on the block device type. @@ -398,10 +399,11 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) length = sizeof (struct dk_gpt) + sizeof (struct dk_part) * (nparts - 1); - if ((*vtoc = calloc(1, length)) == NULL) + vptr = calloc(1, length); + if (vptr == NULL) return (-1); - vptr = *vtoc; + *vtoc = vptr; vptr->efi_version = EFI_VERSION_CURRENT; vptr->efi_lbasize = lbsize; @@ -430,30 +432,32 @@ efi_alloc_and_read(int fd, struct dk_gpt **vtoc) int rval; uint32_t nparts; int length; + struct dk_gpt *vptr; /* figure out the number of entries that would fit into 16K */ nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); length = (int) sizeof (struct dk_gpt) + (int) sizeof (struct dk_part) * (nparts - 1); - if ((*vtoc = calloc(1, length)) == NULL) + vptr = calloc(1, length); + + if (vptr == NULL) return (VT_ERROR); - (*vtoc)->efi_nparts = nparts; - rval = efi_read(fd, *vtoc); + vptr->efi_nparts = nparts; + rval = efi_read(fd, vptr); - if ((rval == VT_EINVAL) && (*vtoc)->efi_nparts > nparts) { + if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { void *tmp; length = (int) sizeof (struct dk_gpt) + - (int) sizeof (struct dk_part) * - ((*vtoc)->efi_nparts - 1); - nparts = (*vtoc)->efi_nparts; - if ((tmp = realloc(*vtoc, length)) == NULL) { - free (*vtoc); + (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); + nparts = vptr->efi_nparts; + if ((tmp = realloc(vptr, length)) == NULL) { + free(vptr); *vtoc = NULL; return (VT_ERROR); } else { - *vtoc = tmp; - rval = efi_read(fd, *vtoc); + vptr = tmp; + rval = efi_read(fd, vptr); } } @@ -462,8 +466,10 @@ efi_alloc_and_read(int fd, struct dk_gpt **vtoc) (void) fprintf(stderr, "read of EFI table failed, rval=%d\n", rval); } - free (*vtoc); + free(vptr); *vtoc = NULL; + } else { + *vtoc = vptr; } return (rval); @@ -1113,7 +1119,9 @@ efi_use_whole_disk(int fd) int i; uint_t resv_index = 0, data_index = 0; diskaddr_t resv_start = 0, data_start = 0; - diskaddr_t difference; + diskaddr_t data_size, limit, difference; + boolean_t sync_needed = B_FALSE; + uint_t nblocks; rval = efi_alloc_and_read(fd, &efi_label); if (rval < 0) { @@ -1122,13 +1130,67 @@ efi_use_whole_disk(int fd) return (rval); } + /* + * Find the last physically non-zero partition. + * This should be the reserved partition. + */ + for (i = 0; i < efi_label->efi_nparts; i ++) { + if (resv_start < efi_label->efi_parts[i].p_start) { + resv_start = efi_label->efi_parts[i].p_start; + resv_index = i; + } + } + + /* + * Find the last physically non-zero partition before that. + * This is the data partition. + */ + for (i = 0; i < resv_index; i ++) { + if (data_start < efi_label->efi_parts[i].p_start) { + data_start = efi_label->efi_parts[i].p_start; + data_index = i; + } + } + data_size = efi_label->efi_parts[data_index].p_size; + + /* + * See the "efi_alloc_and_init" function for more information + * about where this "nblocks" value comes from. + */ + nblocks = efi_label->efi_first_u_lba - 1; + + /* + * Determine if the EFI label is out of sync. We check that: + * + * 1. the data partition ends at the limit we set, and + * 2. the reserved partition starts at the limit we set. + * + * If either of these conditions is not met, then we need to + * resync the EFI label. + * + * The limit is the last usable LBA, determined by the last LBA + * and the first usable LBA fields on the EFI label of the disk + * (see the lines directly above). Additionally, we factor in + * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and + * P2ALIGN it to ensure the partition boundaries are aligned + * (for performance reasons). The alignment should match the + * alignment used by the "zpool_label_disk" function. + */ + limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, + PARTITION_END_ALIGNMENT); + if (data_start + data_size != limit || resv_start != limit) + sync_needed = B_TRUE; + + if (efi_debug && sync_needed) + (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); + /* * If alter_lba is 1, we are using the backup label. * Since we can locate the backup label by disk capacity, * there must be no unallocated space. */ if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba - >= efi_label->efi_last_lba)) { + >= efi_label->efi_last_lba && !sync_needed)) { if (efi_debug) { (void) fprintf(stderr, "efi_use_whole_disk: requested space not found\n"); @@ -1137,24 +1199,11 @@ efi_use_whole_disk(int fd) return (VT_ENOSPC); } - difference = efi_label->efi_last_lba - efi_label->efi_altern_lba; - - /* - * Find the last physically non-zero partition. - * This should be the reserved partition. - */ - for (i = 0; i < efi_label->efi_nparts; i ++) { - if (resv_start < efi_label->efi_parts[i].p_start) { - resv_start = efi_label->efi_parts[i].p_start; - resv_index = i; - } - } - /* * Verify that we've found the reserved partition by checking * that it looks the way it did when we created it in zpool_label_disk. * If we've found the incorrect partition, then we know that this - * device was reformatted and no longer is soley used by ZFS. + * device was reformatted and no longer is solely used by ZFS. */ if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || @@ -1167,17 +1216,36 @@ efi_use_whole_disk(int fd) return (VT_ENOSPC); } - /* - * Find the last physically non-zero partition before that. - * This is the data partition. - */ - for (i = 0; i < resv_index; i ++) { - if (data_start < efi_label->efi_parts[i].p_start) { - data_start = efi_label->efi_parts[i].p_start; - data_index = i; + if (data_start + data_size != resv_start) { + if (efi_debug) { + (void) fprintf(stderr, + "efi_use_whole_disk: " + "data_start (%lli) + " + "data_size (%lli) != " + "resv_start (%lli)\n", + data_start, data_size, resv_start); } + + return (VT_EINVAL); } + if (limit < resv_start) { + if (efi_debug) { + (void) fprintf(stderr, + "efi_use_whole_disk: " + "limit (%lli) < resv_start (%lli)\n", + limit, resv_start); + } + + return (VT_EINVAL); + } + + difference = limit - resv_start; + + if (efi_debug) + (void) fprintf(stderr, + "efi_use_whole_disk: difference is %lli\n", difference); + /* * Move the reserved partition. There is currently no data in * here except fabricated devids (which get generated via @@ -1185,7 +1253,7 @@ efi_use_whole_disk(int fd) */ efi_label->efi_parts[data_index].p_size += difference; efi_label->efi_parts[resv_index].p_start += difference; - efi_label->efi_last_u_lba += difference; + efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; rval = efi_write(fd, efi_label); if (rval < 0) { @@ -1202,7 +1270,6 @@ efi_use_whole_disk(int fd) return (0); } - /* * write EFI label and backup label */ @@ -1222,7 +1289,7 @@ efi_write(int fd, struct dk_gpt *vtoc) if ((rval = efi_get_info(fd, &dki_info)) != 0) return (rval); - /* check if we are dealing wih a metadevice */ + /* check if we are dealing with a metadevice */ if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && (strncmp(dki_info.dki_dname, "md", 3) == 0)) { md_flag = 1; diff --git a/lib/libnvpair/libnvpair_json.c b/lib/libnvpair/libnvpair_json.c index 0b403f1af356..37a392391fb0 100644 --- a/lib/libnvpair/libnvpair_json.c +++ b/lib/libnvpair/libnvpair_json.c @@ -303,7 +303,7 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) for (i = 0; i < valsz; i++) { if (i > 0) FPRINTF(fp, ","); - FPRINTF(fp, "%hd", val[i]); + FPRINTF(fp, "%hhd", val[i]); } FPRINTF(fp, "]"); break; diff --git a/lib/libshare/nfs.c b/lib/libshare/nfs.c index 5c8976e15aa6..7cc5ae43f500 100644 --- a/lib/libshare/nfs.c +++ b/lib/libshare/nfs.c @@ -387,9 +387,10 @@ get_linux_shareopts(const char *shareopts, char **plinux_opts) *plinux_opts = NULL; - /* default options for Solaris shares */ + /* no_subtree_check - Default as of nfs-utils v1.1.0 */ (void) add_linux_shareopt(plinux_opts, "no_subtree_check", NULL); - (void) add_linux_shareopt(plinux_opts, "no_root_squash", NULL); + + /* mountpoint - Restrict exports to ZFS mountpoints */ (void) add_linux_shareopt(plinux_opts, "mountpoint", NULL); rc = foreach_nfs_shareopt(shareopts, get_linux_shareopts_cb, diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c index 4c2045dfdb4d..a95607ee0324 100644 --- a/lib/libshare/smb.c +++ b/lib/libshare/smb.c @@ -29,7 +29,7 @@ * * TESTING * Make sure that samba listens to 'localhost' (127.0.0.1) and that the options - * 'usershare max shares' and 'usershare owner only' have been rewied/set + * 'usershare max shares' and 'usershare owner only' have been reviewed/set * accordingly (see zfs(8) for information). * * Once configuration in samba have been done, test that this diff --git a/lib/libspl/asm-generic/atomic.c b/lib/libspl/asm-generic/atomic.c index d0023b182813..03f8ddcfa8f9 100644 --- a/lib/libspl/asm-generic/atomic.c +++ b/lib/libspl/asm-generic/atomic.c @@ -37,7 +37,7 @@ pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER; /* - * Theses are the void returning variants + * These are the void returning variants */ /* BEGIN CSTYLED */ #define ATOMIC_INC(name, type) \ diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index f615fbdfe7c9..820519c00ae6 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -33,11 +33,18 @@ #include #include +#ifndef _KERNEL +extern int aok; +#endif + static inline int libspl_assert(const char *buf, const char *file, const char *func, int line) { fprintf(stderr, "%s\n", buf); fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func); + if (aok) { + return (0); + } abort(); } @@ -52,6 +59,9 @@ libspl_assertf(const char *file, const char *func, int line, char *format, ...) fprintf(stderr, "\n"); fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func); va_end(args); + if (aok) { + return; + } abort(); } diff --git a/lib/libspl/include/atomic.h b/lib/libspl/include/atomic.h index 7072a11bdb16..f8c257f9696b 100644 --- a/lib/libspl/include/atomic.h +++ b/lib/libspl/include/atomic.h @@ -79,7 +79,7 @@ extern void atomic_add_64(volatile uint64_t *, int64_t); #endif /* - * Substract delta from target + * Subtract delta from target */ extern void atomic_sub_8(volatile uint8_t *, int8_t); extern void atomic_sub_char(volatile uchar_t *, signed char); @@ -173,7 +173,7 @@ extern uint64_t atomic_add_64_nv(volatile uint64_t *, int64_t); #endif /* - * Substract delta from target + * Subtract delta from target */ extern uint8_t atomic_sub_8_nv(volatile uint8_t *, int8_t); extern uchar_t atomic_sub_char_nv(volatile uchar_t *, signed char); diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index 9bd0d949d542..69fb6d401fc7 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -82,7 +82,7 @@ typedef struct kstat { void *ks_data; /* kstat type-specific data */ uint_t ks_ndata; /* # of type-specific data records */ size_t ks_data_size; /* total size of kstat data section */ - hrtime_t ks_snaptime; /* time of last data shapshot */ + hrtime_t ks_snaptime; /* time of last data snapshot */ /* * Fields relevant to kernel only */ diff --git a/lib/libspl/include/sys/param.h b/lib/libspl/include/sys/param.h index c22d508f9b07..26335187fdca 100644 --- a/lib/libspl/include/sys/param.h +++ b/lib/libspl/include/sys/param.h @@ -37,7 +37,7 @@ * with smaller units (fragments) only in the last direct block. * MAXBSIZE primarily determines the size of buffers in the buffer * pool. It may be made larger without any effect on existing - * file systems; however making it smaller make make some file + * file systems; however making it smaller may make some file * systems unmountable. * * Note that the blocked devices are assumed to have DEV_BSIZE diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 97e8412ef70a..91ee3b3fd00d 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -75,7 +75,7 @@ typedef enum xuio_type { typedef struct uioa_page_s { /* locked uio_iov state */ int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */ - void **uioa_ppp; /* page_t or pfn_t arrary */ + void **uioa_ppp; /* page_t or pfn_t array */ caddr_t uioa_base; /* address base */ size_t uioa_len; /* span length */ } uioa_page_t; diff --git a/lib/libspl/include/sys/vtoc.h b/lib/libspl/include/sys/vtoc.h index 22a652b74bf1..5d8448b628dc 100644 --- a/lib/libspl/include/sys/vtoc.h +++ b/lib/libspl/include/sys/vtoc.h @@ -51,7 +51,7 @@ extern "C" { * v_sanity returned as VTOC_SANE * if Disk Label was sane * v_sectorsz returned as 512 - * v_reserved [all] retunred as zero + * v_reserved [all] returned as zero * timestamp [all] returned as zero * * See dklabel.h, read_vtoc(), and write_vtoc(). diff --git a/lib/libspl/mkdirp.c b/lib/libspl/mkdirp.c index 54174175200e..fce2c1c82eb7 100644 --- a/lib/libspl/mkdirp.c +++ b/lib/libspl/mkdirp.c @@ -128,7 +128,7 @@ mkdirp(const char *d, mode_t mode) * caller, or NULL is returned on error. * * The caller should handle error reporting based upon the - * returned vlaue, and should free the returned value, + * returned value, and should free the returned value, * when appropriate. */ diff --git a/lib/libspl/zone.c b/lib/libspl/zone.c index 5ca93b224d9e..4a0e600ca3b9 100644 --- a/lib/libspl/zone.c +++ b/lib/libspl/zone.c @@ -27,6 +27,8 @@ #include #include +int aok = 0; + zoneid_t getzoneid() { diff --git a/lib/libtpool/thread_pool.c b/lib/libtpool/thread_pool.c index a43fdd9cd608..267fa834bd72 100644 --- a/lib/libtpool/thread_pool.c +++ b/lib/libtpool/thread_pool.c @@ -134,7 +134,7 @@ tpool_worker(void *arg) /* * This is the worker's main loop. - * It will only be left if a timeout or an error has occured. + * It will only be left if a timeout or an error has occurred. */ active.tpa_tid = pthread_self(); for (;;) { diff --git a/lib/libzfs/THIRDPARTYLICENSE.openssl b/lib/libzfs/THIRDPARTYLICENSE.openssl index a2c4adcbe6a5..92c9e196a318 100644 --- a/lib/libzfs/THIRDPARTYLICENSE.openssl +++ b/lib/libzfs/THIRDPARTYLICENSE.openssl @@ -101,7 +101,7 @@ * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library + * The word 'cryptographic' can be left out if the routines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: diff --git a/lib/libzfs/libzfs.pc.in b/lib/libzfs/libzfs.pc.in index 0e83f7a64be0..1122401a6eb9 100644 --- a/lib/libzfs/libzfs.pc.in +++ b/lib/libzfs/libzfs.pc.in @@ -9,4 +9,4 @@ Version: @VERSION@ URL: http://zfsonlinux.org Requires: libzfs_core Cflags: -I${includedir}/libzfs -I${includedir}/libspl -Libs: -L${libdir} -lzfs +Libs: -L${libdir} -lzfs -lnvpair diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 3101febc1605..72f641056edc 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -475,9 +475,10 @@ change_one(zfs_handle_t *zhp, void *data) prop_changelist_t *clp = data; char property[ZFS_MAXPROPLEN]; char where[64]; - prop_changenode_t *cn; + prop_changenode_t *cn = NULL; zprop_source_t sourcetype = ZPROP_SRC_NONE; zprop_source_t share_sourcetype = ZPROP_SRC_NONE; + int ret = 0; /* * We only want to unmount/unshare those filesystems that may inherit @@ -493,8 +494,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_prop, property, sizeof (property), &sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } /* @@ -506,8 +506,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_shareprop, property, sizeof (property), &share_sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } if (clp->cl_alldependents || clp->cl_allchildren || @@ -518,8 +517,8 @@ change_one(zfs_handle_t *zhp, void *data) share_sourcetype == ZPROP_SRC_INHERITED))) { if ((cn = zfs_alloc(zfs_get_handle(zhp), sizeof (prop_changenode_t))) == NULL) { - zfs_close(zhp); - return (-1); + ret = -1; + goto out; } cn->cn_handle = zhp; @@ -541,16 +540,23 @@ change_one(zfs_handle_t *zhp, void *data) uu_avl_insert(clp->cl_tree, cn, idx); } else { free(cn); - zfs_close(zhp); + cn = NULL; } if (!clp->cl_alldependents) - return (zfs_iter_children(zhp, change_one, data)); - } else { - zfs_close(zhp); + ret = zfs_iter_children(zhp, change_one, data); + + /* + * If we added the handle to the changelist, we will re-use it + * later so return without closing it. + */ + if (cn != NULL) + return (ret); } - return (0); +out: + zfs_close(zhp); + return (ret); } static int diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index 3318a6bd2e11..b7b567ef53c5 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -242,7 +242,7 @@ get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat, out: if (isatty(fileno(fd))) { - /* reset the teminal */ + /* reset the terminal */ (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term); (void) sigaction(SIGINT, &osigint, NULL); (void) sigaction(SIGTSTP, &osigtstp, NULL); @@ -740,14 +740,6 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, pcrypt = ZIO_CRYPT_OFF; } - /* Check for encryption being explicitly truned off */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Invalid encryption value. Dataset must be encrypted.")); - goto out; - } - /* Get the inherited encryption property if we don't have it locally */ if (!local_crypt) crypt = pcrypt; @@ -849,10 +841,7 @@ int zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, char *parent_name, nvlist_t *props) { - int ret; char errbuf[1024]; - zfs_handle_t *pzhp = NULL; - uint64_t pcrypt, ocrypt; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "Encryption clone error")); @@ -865,40 +854,12 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) { - ret = EINVAL; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Encryption properties must inherit from origin dataset.")); - goto out; - } - - /* get a reference to parent dataset, should never be NULL */ - pzhp = make_dataset_handle(hdl, parent_name); - if (pzhp == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Failed to lookup parent.")); - return (ENOENT); + return (EINVAL); } - /* Lookup parent's crypt */ - pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); - ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); - - /* all children of encrypted parents must be encrypted */ - if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Cannot create unencrypted clone as a child " - "of encrypted parent.")); - goto out; - } - - zfs_close(pzhp); return (0); - -out: - if (pzhp != NULL) - zfs_close(pzhp); - return (ret); } typedef struct loadkeys_cbdata { @@ -1360,7 +1321,7 @@ zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey) if (is_encroot) { /* - * If this is already an ecryption root, just keep + * If this is already an encryption root, just keep * any properties not set by the user. */ if (keyformat == ZFS_KEYFORMAT_NONE) { diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index de94021a6758..4a07c8d20bf5 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . @@ -31,6 +31,7 @@ * Copyright 2016 Igor Kozhukhov * Copyright 2017-2018 RackTop Systems. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2019, loli10K */ #include @@ -118,8 +119,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '@' delimiter in snapshot name, " - "did you mean to use -r?")); + "missing '@' delimiter in snapshot name")); return (0); } @@ -133,8 +133,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, if (type == ZFS_TYPE_BOOKMARK && strchr(path, '#') == NULL) { if (hdl != NULL) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '#' delimiter in bookmark name, " - "did you mean to use -r?")); + "missing '#' delimiter in bookmark name")); return (0); } @@ -196,6 +195,16 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, "reserved disk name")); break; + case NAME_ERR_SELF_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "self reference, '.' is found in name")); + break; + + case NAME_ERR_PARENT_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent reference, '..' is found in name")); + break; + default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); @@ -1221,12 +1230,19 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } case ZFS_PROP_SPECIAL_SMALL_BLOCKS: + { + int maxbs = SPA_OLD_MAXBLOCKSIZE; + char buf[64]; + if (zpool_hdl != NULL) { char state[64] = ""; + maxbs = zpool_get_prop_int(zpool_hdl, + ZPOOL_PROP_MAXBLOCKSIZE, NULL); + /* * Issue a warning but do not fail so that - * tests for setable properties succeed. + * tests for settable properties succeed. */ if (zpool_prop_get_feature(zpool_hdl, "feature@allocation_classes", state, @@ -1239,15 +1255,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } if (intval != 0 && (intval < SPA_MINBLOCKSIZE || - intval > SPA_OLD_MAXBLOCKSIZE || !ISP2(intval))) { + intval > maxbs || !ISP2(intval))) { + zfs_nicebytes(maxbs, buf, sizeof (buf)); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid '%s=%d' property: must be zero or " - "a power of 2 from 512B to 128K"), propname, - intval); + "a power of 2 from 512B to %s"), propname, + intval, buf); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; + } case ZFS_PROP_MLSLABEL: { @@ -1441,7 +1459,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, * There was an error in parsing so * deal with it by issuing an error * message and leaving after - * uninitializing the the libshare + * uninitializing the libshare * interface. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -1607,6 +1625,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) uint64_t new_reservation; zfs_prop_t resv_prop; nvlist_t *props; + zpool_handle_t *zph = zpool_handle(zhp); /* * If this is an existing volume, and someone is setting the volsize, @@ -1621,7 +1640,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); - if ((zvol_volsize_to_reservation(old_volsize, props) != + if ((zvol_volsize_to_reservation(zph, old_volsize, props) != old_reservation) || nvlist_exists(nvl, zfs_prop_to_name(resv_prop))) { fnvlist_free(props); @@ -1632,7 +1651,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) fnvlist_free(props); return (-1); } - new_reservation = zvol_volsize_to_reservation(new_volsize, props); + new_reservation = zvol_volsize_to_reservation(zph, new_volsize, props); fnvlist_free(props); if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), @@ -1645,7 +1664,7 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) /* * Helper for 'zfs {set|clone} refreservation=auto'. Must be called after - * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinal value. + * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinel value. * Return codes must match zfs_add_synthetic_resv(). */ static int @@ -1687,7 +1706,8 @@ zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl) volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); } - resvsize = zvol_volsize_to_reservation(volsize, props); + resvsize = zvol_volsize_to_reservation(zpool_handle(zhp), volsize, + props); fnvlist_free(props); (void) nvlist_remove_all(nvl, zfs_prop_to_name(prop)); @@ -2969,8 +2989,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, case ZFS_PROP_GUID: case ZFS_PROP_CREATETXG: + case ZFS_PROP_OBJSETID: /* - * GUIDs are stored as numbers, but they are identifiers. + * These properties are stored as numbers, but they are + * identifiers. * We don't want them to be pretty printed, because pretty * printing mangles the ID into a truncated and useless value. */ @@ -4104,6 +4126,16 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { switch (ret) { + case EACCES: + /* + * Promoting encrypted dataset outside its + * encryption root. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot promote dataset outside its " + "encryption root")); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + case EEXIST: /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -4467,8 +4499,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, zfs_cmd_t zc = {"\0"}; char *delim; prop_changelist_t *cl = NULL; - zfs_handle_t *zhrp = NULL; - char *parentname = NULL; char parent[ZFS_MAX_DATASET_NAME_LEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4563,7 +4593,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } if (recursive) { - parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); + zfs_handle_t *zhrp; + char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { ret = -1; goto error; @@ -4571,10 +4602,12 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, delim = strchr(parentname, '@'); *delim = '\0'; zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); + free(parentname); if (zhrp == NULL) { ret = -1; goto error; } + zfs_close(zhrp); } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, CL_GATHER_ITER_MOUNTED, @@ -4618,16 +4651,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); } else if (errno == EACCES) { - if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == - ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot rename an unencrypted dataset to " - "be a decendent of an encrypted one")); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot move encryption child outside of " - "its encryption root")); - } + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encrypted child outside of " + "its encryption root")); (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); @@ -4647,12 +4673,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } error: - if (parentname != NULL) { - free(parentname); - } - if (zhrp != NULL) { - zfs_close(zhrp); - } if (cl != NULL) { changelist_free(cl); } @@ -5352,12 +5372,176 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) } /* - * Convert the zvol's volume size to an appropriate reservation. + * The theory of raidz space accounting + * + * The "referenced" property of RAIDZ vdevs is scaled such that a 128KB block + * will "reference" 128KB, even though it allocates more than that, to store the + * parity information (and perhaps skip sectors). This concept of the + * "referenced" (and other DMU space accounting) being lower than the allocated + * space by a constant factor is called "raidz deflation." + * + * As mentioned above, the constant factor for raidz deflation assumes a 128KB + * block size. However, zvols typically have a much smaller block size (default + * 8KB). These smaller blocks may require proportionally much more parity + * information (and perhaps skip sectors). In this case, the change to the + * "referenced" property may be much more than the logical block size. + * + * Suppose a raidz vdev has 5 disks with ashift=12. A 128k block may be written + * as follows. + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | D8 | D16 | D24 | + * | P1 | D1 | D9 | D17 | D25 | + * | P2 | D2 | D10 | D18 | D26 | + * | P3 | D3 | D11 | D19 | D27 | + * | P4 | D4 | D12 | D20 | D28 | + * | P5 | D5 | D13 | D21 | D29 | + * | P6 | D6 | D14 | D22 | D30 | + * | P7 | D7 | D15 | D23 | D31 | + * +-------+-------+-------+-------+-------+ + * + * Above, notice that 160k was allocated: 8 x 4k parity sectors + 32 x 4k data + * sectors. The dataset's referenced will increase by 128k and the pool's + * allocated and free properties will be adjusted by 160k. + * + * A 4k block written to the same raidz vdev will require two 4k sectors. The + * blank cells represent unallocated space. + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | | | | + * +-------+-------+-------+-------+-------+ + * + * Above, notice that the 4k block required one sector for parity and another + * for data. vdev_raidz_asize() will return 8k and as such the pool's allocated + * and free properties will be adjusted by 8k. The dataset will not be charged + * 8k. Rather, it will be charged a value that is scaled according to the + * overhead of the 128k block on the same vdev. This 8k allocation will be + * charged 8k * 128k / 160k. 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as + * calculated in the 128k block example above. + * + * Every raidz allocation is sized to be a multiple of nparity+1 sectors. That + * is, every raidz1 allocation will be a multiple of 2 sectors, raidz2 + * allocations are a multiple of 3 sectors, and raidz3 allocations are a + * multiple of of 4 sectors. When a block does not fill the required number of + * sectors, skip blocks (sectors) are used. + * + * An 8k block being written to a raidz vdev may be written as follows: + * + * +-------+-------+-------+-------+-------+ + * | disk1 | disk2 | disk3 | disk4 | disk5 | + * +-------+-------+-------+-------+-------+ + * | P0 | D0 | D1 | S0 | | + * +-------+-------+-------+-------+-------+ + * + * In order to maintain the nparity+1 allocation size, a skip block (S0) was + * added. For this 8k block, the pool's allocated and free properties are + * adjusted by 16k and the dataset's referenced is increased by 16k * 128k / + * 160k. Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in + * the 128k block example above. + * + * Compression may lead to a variety of block sizes being written for the same + * volume or file. There is no clear way to reserve just the amount of space + * that will be required, so the worst case (no compression) is assumed. + * Note that metadata blocks will typically be compressed, so the reservation + * size returned by zvol_volsize_to_reservation() will generally be slightly + * larger than the maximum that the volume can reference. + */ + +/* + * Derived from function of same name in module/zfs/vdev_raidz.c. Returns the + * amount of space (in bytes) that will be allocated for the specified block + * size. Note that the "referenced" space accounted will be less than this, but + * not necessarily equal to "blksize", due to RAIDZ deflation. + */ +static uint64_t +vdev_raidz_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift, + uint64_t blksize) +{ + uint64_t asize, ndata; + + ASSERT3U(ndisks, >, nparity); + ndata = ndisks - nparity; + asize = ((blksize - 1) >> ashift) + 1; + asize += nparity * ((asize + ndata - 1) / ndata); + asize = roundup(asize, nparity + 1) << ashift; + + return (asize); +} + +/* + * Determine how much space will be allocated if it lands on the most space- + * inefficient top-level vdev. Returns the size in bytes required to store one + * copy of the volume data. See theory comment above. + */ +static uint64_t +volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize) +{ + nvlist_t *config, *tree, **vdevs; + uint_t nvdevs, v; + uint64_t ret = 0; + + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 || + nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, + &vdevs, &nvdevs) != 0) { + return (nblocks * blksize); + } + + for (v = 0; v < nvdevs; v++) { + char *type; + uint64_t nparity, ashift, asize, tsize; + nvlist_t **disks; + uint_t ndisks; + uint64_t volsize; + + if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE, + &type) != 0 || strcmp(type, VDEV_TYPE_RAIDZ) != 0 || + nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_NPARITY, + &nparity) != 0 || + nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_ASHIFT, + &ashift) != 0 || + nvlist_lookup_nvlist_array(vdevs[v], ZPOOL_CONFIG_CHILDREN, + &disks, &ndisks) != 0) { + continue; + } + + /* allocation size for the "typical" 128k block */ + tsize = vdev_raidz_asize(ndisks, nparity, ashift, + SPA_OLD_MAXBLOCKSIZE); + /* allocation size for the blksize block */ + asize = vdev_raidz_asize(ndisks, nparity, ashift, blksize); + + /* + * Scale this size down as a ratio of 128k / tsize. See theory + * statement above. + */ + volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize; + if (volsize > ret) { + ret = volsize; + } + } + + if (ret == 0) { + ret = nblocks * blksize; + } + + return (ret); +} + +/* + * Convert the zvol's volume size to an appropriate reservation. See theory + * comment above. + * * Note: If this routine is updated, it is necessary to update the ZFS test - * suite's shell version in reservation.kshlib. + * suite's shell version in reservation.shlib. */ uint64_t -zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) +zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize, + nvlist_t *props) { uint64_t numdb; uint64_t nblocks, volblocksize; @@ -5373,7 +5557,14 @@ zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) volblocksize = ZVOL_DEFAULT_BLOCKSIZE; - nblocks = volsize/volblocksize; + + nblocks = volsize / volblocksize; + /* + * Metadata defaults to using 128k blocks, not volblocksize blocks. For + * this reason, only the data blocks are scaled based on vdev config. + */ + volsize = volsize_from_vdevs(zph, nblocks, volblocksize); + /* start with metadnode L0-L6 */ numdb = 7; /* calculate number of indirects */ diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 649c232aa3e5..d62801cfdaca 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -1302,12 +1302,14 @@ mountpoint_cmp(const void *arga, const void *argb) } /* - * Return true if path2 is a child of path1. + * Return true if path2 is a child of path1 or path2 equals path1 or + * path1 is "/" (path2 is always a child of "/"). */ static boolean_t libzfs_path_contains(const char *path1, const char *path2) { - return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'); + return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 || + (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/')); } /* diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 881b15c1bc79..3ef65f2684b4 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -734,8 +734,6 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, break; default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s'(%d) not defined"), propname, prop); break; } } @@ -1531,7 +1529,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) case EOVERFLOW: /* - * This occurrs when one of the devices is below + * This occurs when one of the devices is below * SPA_MINDEVSIZE. Unfortunately, we can't detect which * device was the problem device since there's no * reliable way to determine device size from userland. @@ -4176,7 +4174,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) /* * Sort the resulting bookmarks. This is a little confusing due to the * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last - * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks + * to first, and 'zc_nvlist_dst_size' indicates the number of bookmarks * _not_ copied as part of the process. So we point the start of our * array appropriate and decrement the total number of elements. */ @@ -4327,33 +4325,37 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) * Retrieve the command history of a pool. */ int -zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) +zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off, + boolean_t *eof) { char *buf; int buflen = 128 * 1024; - uint64_t off = 0; nvlist_t **records = NULL; uint_t numrecords = 0; int err, i; + uint64_t start = *off; buf = malloc(buflen); if (buf == NULL) return (ENOMEM); - do { + /* process about 1MB a time */ + while (*off - start < 1024 * 1024) { uint64_t bytes_read = buflen; uint64_t leftover; - if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0) + if ((err = get_history(zhp, buf, off, &bytes_read)) != 0) break; /* if nothing else was read in, we're at EOF, just return */ - if (!bytes_read) + if (!bytes_read) { + *eof = B_TRUE; break; + } if ((err = zpool_history_unpack(buf, bytes_read, &leftover, &records, &numrecords)) != 0) break; - off -= leftover; + *off -= leftover; if (leftover == bytes_read) { /* * no progress made, because buffer is not big enough @@ -4365,9 +4367,7 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) if (buf == NULL) return (ENOMEM); } - - /* CONSTCOND */ - } while (1); + } free(buf); @@ -4804,7 +4804,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) if (rval) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " "EFI label on '%s' is damaged. Ensure\nthis device " - "is not in in use, and is functioning properly: %d"), + "is not in use, and is functioning properly: %d"), path, rval); return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index f69a46430bbe..1875f79e7c35 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2827,7 +2827,7 @@ recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname, is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); - /* we don't need to do anything for unencrypted filesystems */ + /* we don't need to do anything for unencrypted datasets */ if (crypt == ZIO_CRYPT_OFF) { zfs_close(zhp); continue; @@ -3438,10 +3438,11 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); + uint64_t payload_size; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive:")); + "cannot receive")); /* XXX would be great to use lseek if possible... */ drr = buf; @@ -3468,9 +3469,14 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) drr->drr_u.drr_object.drr_bonuslen = BSWAP_32(drr->drr_u.drr_object. drr_bonuslen); + drr->drr_u.drr_object.drr_raw_bonuslen = + BSWAP_32(drr->drr_u.drr_object. + drr_raw_bonuslen); } - (void) recv_read(hdl, fd, buf, - P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), + + payload_size = + DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object); + (void) recv_read(hdl, fd, buf, payload_size, B_FALSE, NULL); break; @@ -3483,7 +3489,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) BSWAP_64( drr->drr_u.drr_write.drr_compressed_size); } - uint64_t payload_size = + payload_size = DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write); (void) recv_read(hdl, fd, buf, payload_size, B_FALSE, NULL); @@ -3492,9 +3498,15 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) if (byteswap) { drr->drr_u.drr_spill.drr_length = BSWAP_64(drr->drr_u.drr_spill.drr_length); + drr->drr_u.drr_spill.drr_compressed_size = + BSWAP_64(drr->drr_u.drr_spill. + drr_compressed_size); } - (void) recv_read(hdl, fd, buf, - drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); + + payload_size = + DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill); + (void) recv_read(hdl, fd, buf, payload_size, + B_FALSE, NULL); break; case DRR_WRITE_EMBEDDED: if (byteswap) { @@ -3506,6 +3518,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, 8), B_FALSE, NULL); break; + case DRR_OBJECT_RANGE: case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: @@ -3647,11 +3660,21 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, /* raw streams can't override encryption properties */ if ((zfs_prop_encryption_key_param(prop) || - prop == ZFS_PROP_ENCRYPTION) && (raw || !newfs)) { + prop == ZFS_PROP_ENCRYPTION) && raw) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption property '%s' cannot " + "be set or excluded for raw streams."), name); + ret = zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + /* incremental streams can only exclude encryption properties */ + if ((zfs_prop_encryption_key_param(prop) || + prop == ZFS_PROP_ENCRYPTION) && !newfs && + nvpair_type(nvp) != DATA_TYPE_BOOLEAN) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "encryption property '%s' cannot " - "be set or excluded for raw or incremental " - "streams."), name); + "be set for incremental streams."), name); ret = zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } @@ -3669,10 +3692,12 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, */ if (nvlist_exists(origprops, name)) { nvlist_t *attrs; + char *source = NULL; attrs = fnvlist_lookup_nvlist(origprops, name); - if (strcmp(fnvlist_lookup_string(attrs, - ZPROP_SOURCE), ZPROP_SOURCE_VAL_RECVD) != 0) + if (nvlist_lookup_string(attrs, + ZPROP_SOURCE, &source) == 0 && + strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0) continue; } /* @@ -3992,11 +4017,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } } else { /* - * if the fs does not exist, look for it based on the - * fromsnap GUID + * If the fs does not exist, look for it based on the + * fromsnap GUID. */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive incremental stream")); + if (resuming) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive resume stream")); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive incremental stream")); + } (void) strcpy(name, destsnap); *strchr(name, '@') = '\0'; @@ -4093,7 +4125,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, /* * Raw sends can not be performed as an incremental on top - * of existing unencryppted datasets. zfs recv -F cant be + * of existing unencrypted datasets. zfs recv -F can't be * used to blow away an existing encrypted filesystem. This * is because it would require the dsl dir to point to the * new key (or lack of a key) and the old key at the same @@ -4210,34 +4242,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, goto out; } - /* - * It is invalid to receive a properties stream that was - * unencrypted on the send side as a child of an encrypted - * parent. Technically there is nothing preventing this, but - * it would mean that the encryption=off property which is - * locally set on the send side would not be received correctly. - * We can infer encryption=off if the stream is not raw and - * properties were included since the send side will only ever - * send the encryption property in a raw nvlist header. This - * check will be avoided if the user specifically overrides - * the encryption property on the command line. - */ - if (!raw && rcvprops != NULL && - !nvlist_exists(cmdprops, - zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { - uint64_t crypt; - - crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); - - if (crypt != ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent '%s' must not be encrypted to " - "receive unenecrypted property"), name); - err = zfs_error(hdl, EZFS_BADPROP, errbuf); - zfs_close(zhp); - goto out; - } - } zfs_close(zhp); newfs = B_TRUE; @@ -4253,6 +4257,21 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } if (flags->dryrun) { + void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); + + /* + * We have read the DRR_BEGIN record, but we have + * not yet read the payload. For non-dryrun sends + * this will be done by the kernel, so we must + * emulate that here, before attempting to read + * more records. + */ + err = recv_read(hdl, infd, buf, drr->drr_payloadlen, + flags->byteswap, NULL); + free(buf); + if (err != 0) + goto out; + err = recv_skip(hdl, infd, flags->byteswap); goto out; } @@ -4274,6 +4293,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, &oxprops, &wkeydata, &wkeylen, errbuf)) != 0) goto out; + /* + * When sending with properties (zfs send -p), the encryption property + * is not included because it is a SETONCE property and therefore + * treated as read only. However, we are always able to determine its + * value because raw sends will include it in the DRR_BDEGIN payload + * and non-raw sends with properties are not allowed for encrypted + * datasets. Therefore, if this is a non-raw properties stream, we can + * infer that the value should be ZIO_CRYPT_OFF and manually add that + * to the received properties. + */ + if (stream_wantsnewfs && !raw && rcvprops != NULL && + !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { + if (oxprops == NULL) + oxprops = fnvlist_alloc(); + fnvlist_add_uint64(oxprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF); + } + err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable, raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags, @@ -4428,14 +4465,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: - if (flags->resumable) + if (flags->resumable) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "kernel modules must be upgraded to " "receive this stream.")); - if (embedded && !raw) + } else if (embedded && !raw) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incompatible embedded data stream " "feature with encrypted receive.")); + } (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 19bb57ad4378..4a9676668fb5 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -69,21 +69,21 @@ libzfs_error_init(int error) case ENXIO: return (dgettext(TEXT_DOMAIN, "The ZFS modules are not " "loaded.\nTry running '/sbin/modprobe zfs' as root " - "to load them.\n")); + "to load them.")); case ENOENT: return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts " "are required.\nTry running 'udevadm trigger' and 'mount " - "-t proc proc /proc' as root.\n")); + "-t proc proc /proc' as root.")); case ENOEXEC: return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be " "auto-loaded.\nTry running '/sbin/modprobe zfs' as " - "root to manually load them.\n")); + "root to manually load them.")); case EACCES: return (dgettext(TEXT_DOMAIN, "Permission denied the " - "ZFS utilities must be run as root.\n")); + "ZFS utilities must be run as root.")); default: return (dgettext(TEXT_DOMAIN, "Failed to initialize the " - "libzfs library.\n")); + "libzfs library.")); } } @@ -303,6 +303,8 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_NO_RESILVER_DEFER: return (dgettext(TEXT_DOMAIN, "this action requires the " "resilver_defer feature")); + case EZFS_EXPORT_IN_PROGRESS: + return (dgettext(TEXT_DOMAIN, "pool export in progress")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -598,6 +600,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_VDEV_TOO_BIG: zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap); break; + case ZFS_ERR_EXPORT_IN_PROGRESS: + zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap); + break; case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " @@ -1134,7 +1139,7 @@ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) - len = 16 * 1024; + len = 256 * 1024; zc->zc_nvlist_dst_size = len; zc->zc_nvlist_dst = (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 99fc84d04614..eb332bc94e8c 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -52,7 +52,7 @@ * * - Thin Layer. libzfs_core is a thin layer, marshaling arguments * to/from the kernel ioctls. There is generally a 1:1 correspondence - * between libzfs_core functions and ioctls to /dev/zfs. + * between libzfs_core functions and ioctls to ZFS_DEV. * * - Clear Atomicity. Because libzfs_core functions are generally 1:1 * with kernel ioctls, and kernel ioctls are general atomic, each @@ -135,7 +135,7 @@ libzfs_core_init(void) { (void) pthread_mutex_lock(&g_lock); if (g_refcount == 0) { - g_fd = open("/dev/zfs", O_RDWR); + g_fd = open(ZFS_DEV, O_RDWR); if (g_fd < 0) { (void) pthread_mutex_unlock(&g_lock); return (errno); @@ -499,7 +499,7 @@ lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) * The snapshots must all be in the same pool. * The value is the name of the hold (string type). * - * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). * In this case, when the cleanup_fd is closed (including on process * termination), the holds will be released. If the system is shut down * uncleanly, the holds will be released when the pool is next opened diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0f39e0d72bca..5d80f9e78cd8 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -47,7 +47,6 @@ * Emulation of kernel services in userland. */ -int aok; uint64_t physmem; vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; @@ -339,6 +338,13 @@ cv_wait(kcondvar_t *cv, kmutex_t *mp) mp->m_owner = pthread_self(); } +int +cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) +{ + cv_wait(cv, mp); + return (1); +} + clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c index ad05d2239ae0..67bc209ceec9 100644 --- a/lib/libzpool/util.c +++ b/lib/libzpool/util.c @@ -223,7 +223,7 @@ pool_active(void *unused, const char *name, uint64_t guid, * Use ZFS_IOC_POOL_SYNC to confirm if a pool is active */ - fd = open("/dev/zfs", O_RDWR); + fd = open(ZFS_DEV, O_RDWR); if (fd < 0) return (-1); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index e82744383dc0..e84680a7976d 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1651,17 +1651,25 @@ zpool_open_func(void *arg) if (rn->rn_labelpaths) { char *path = NULL; char *devid = NULL; + char *env = NULL; rdsk_node_t *slice; avl_index_t where; + int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; + env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); + if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || + timeout < 0) { + timeout = DISK_LABEL_WAIT; + } + /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT); + zpool_label_disk_wait(rn->rn_name, timeout); if (path != NULL) { slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); @@ -1793,7 +1801,7 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, char *dpath, *name; /* - * Seperate the directory part and last part of the + * Separate the directory part and last part of the * path. We do this so that we can get the realpath of * the directory. We don't get the realpath on the * whole path because if it's a symlink, we want the @@ -2080,8 +2088,8 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg) tpool_destroy(t); /* - * Process the cache filtering out any entries which are not - * for the specificed pool then adding matching label configs. + * Process the cache, filtering out any entries which are not + * for the specified pool then adding matching label configs. */ cookie = NULL; while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) { diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am index bd78be1452a8..2af917fa5c2e 100644 --- a/man/man1/Makefile.am +++ b/man/man1/Makefile.am @@ -1,4 +1,4 @@ -dist_man_MANS = zhack.1 ztest.1 raidz_test.1 +dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 EXTRA_DIST = cstyle.1 install-data-local: diff --git a/man/man1/cstyle.1 b/man/man1/cstyle.1 index f2b637d4c36b..f77d534507a4 100644 --- a/man/man1/cstyle.1 +++ b/man/man1/cstyle.1 @@ -31,7 +31,7 @@ .IX "OS-Net build tools" "cstyle" "" "\fBcstyle\fP" .LP .I cstyle -inspects C source files (*.c and *.h) for common sylistic errors. It +inspects C source files (*.c and *.h) for common stylistic errors. It attempts to check for the cstyle documented in \fIhttp://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf\fP. Note that there is much in that document that diff --git a/man/man1/raidz_test.1 b/man/man1/raidz_test.1 index 90d858d5bb40..423177a1b839 100644 --- a/man/man1/raidz_test.1 +++ b/man/man1/raidz_test.1 @@ -25,7 +25,7 @@ .TH raidz_test 1 "2016" "ZFS on Linux" "User Commands" .SH NAME -\fBraidz_test\fR \- raidz implementation verification and bencmarking tool +\fBraidz_test\fR \- raidz implementation verification and benchmarking tool .SH SYNOPSIS .LP .BI "raidz_test " diff --git a/man/man1/ztest.1 b/man/man1/ztest.1 index b8cb0d45d92c..84e56c822d13 100644 --- a/man/man1/ztest.1 +++ b/man/man1/ztest.1 @@ -175,5 +175,5 @@ By default the stack size is limited to 256K. .BR "zfs (1)" "," .BR "zdb (1)" "," .SH "AUTHOR" -This manual page was transvered to asciidoc by Michael Gebetsroither +This manual page was transferred to asciidoc by Michael Gebetsroither from http://opensolaris.org/os/community/zfs/ztest/ diff --git a/man/man1/zvol_wait.1 b/man/man1/zvol_wait.1 new file mode 100644 index 000000000000..0366da5376d3 --- /dev/null +++ b/man/man1/zvol_wait.1 @@ -0,0 +1,21 @@ +.Dd July 5, 2019 +.Dt ZVOL_WAIT 1 SMM +.Os Linux +.Sh NAME +.Nm zvol_wait +.Nd Wait for ZFS volume links in +.Em /dev +to be created. +.Sh SYNOPSIS +.Nm +.Sh DESCRIPTION +When a ZFS pool is imported, ZFS will register each ZFS volume +(zvol) as a disk device with the system. As the disks are registered, +.Xr \fBudev 7\fR +will asynchronously create symlinks under +.Em /dev/zvol +using the zvol's name. +.Nm +will wait for all those symlinks to be created before returning. +.Sh SEE ALSO +.Xr \fBudev 7\fR diff --git a/man/man5/vdev_id.conf.5 b/man/man5/vdev_id.conf.5 index 5b7fbf0cad49..89c5ee961094 100644 --- a/man/man5/vdev_id.conf.5 +++ b/man/man5/vdev_id.conf.5 @@ -41,7 +41,7 @@ disk enclosure). .TP \fIenclosure_symlinks\fR Additionally create /dev/by-enclosure symlinks to the disk enclosure -sg devices using the naming scheme from from vdev_id.conf. +sg devices using the naming scheme from vdev_id.conf. \fIenclosure_symlinks\fR is only allowed for sas_direct mode. .TP \fIenclosure_symlinks_prefix\fR diff --git a/man/man5/zfs-events.5 b/man/man5/zfs-events.5 index 7e9bbedafdad..4a28be71e685 100644 --- a/man/man5/zfs-events.5 +++ b/man/man5/zfs-events.5 @@ -557,7 +557,7 @@ How many write errors that have been detected on the vdev. \fBvdev_cksum_errors\fR .ad .RS 12n -How many checkum errors that have been detected on the vdev. +How many checksum errors that have been detected on the vdev. .RE .sp @@ -858,7 +858,7 @@ such as IDE or parallel SCSI. .RS 12n If this field exists, it is an array of counters. Each entry counts bit clears in a particular bit of a big-endian uint64 type. The first entry counts bits -clears of the the high-order bit of the first byte, the 9th byte, etc, and the +clears of the high-order bit of the first byte, the 9th byte, etc, and the last entry counts clears of the low-order bit of the 8th byte, the 16th byte, etc. This information is useful for observing a stuck bit in a parallel data path, such as IDE or parallel SCSI. diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 5bca12e06ea2..8d30e949f57e 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -104,6 +104,18 @@ to a log2 fraction of the target arc size. Default value: \fB6\fR. .RE +.sp +.ne 2 +.na +\fBdmu_prefetch_max\fR (int) +.ad +.RS 12n +Limit the amount we can prefetch with one call to this amount (in bytes). +This helps to limit the amount of memory that can be used by prefetching. +.sp +Default value: \fB134,217,728\fR (128MB). +.RE + .sp .ne 2 .na @@ -313,6 +325,40 @@ Enable use of the fragmentation metric in computing metaslab weights. Use \fB1\fR for yes (default) and \fB0\fR for no. .RE +.sp +.ne 2 +.na +\fBmetaslab_df_max_search\fR (int) +.ad +.RS 12n +Maximum distance to search forward from the last offset. Without this limit, +fragmented pools can see >100,000 iterations and metaslab_block_picker() +becomes the performance limiting factor on high-performance storage. + +With the default setting of 16MB, we typically see less than 500 iterations, +even with very fragmented, ashift=9 pools. The maximum number of iterations +possible is: \fBmetaslab_df_max_search / (2 * (1</] Ns Op : Ns Ar flags .Nm .Fl S .Op Fl AP @@ -227,7 +227,7 @@ This option can be combined with .Fl v for increasing verbosity. .It Xo -.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags +.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [/] Ns Op : Ns Ar flags .Xc Read and display a block from the specified device. By default the block is displayed as a hex dump, but see the description of the @@ -240,17 +240,19 @@ The block is specified in terms of a colon-separated tuple .Ar offset .Pq the offset within the vdev .Ar size -.Pq the size of the block to read -and, optionally, +.Pq the physical size, or logical size / physical size +of the block to read and, optionally, .Ar flags .Pq a set of flags, described below . .Pp .Bl -tag -compact -width "b offset" .It Sy b Ar offset Print block pointer +.It Sy c +Calculate and display checksums .It Sy d Decompress the block. Set environment variable -.Nm ZBD_NO_ZLE +.Nm ZDB_NO_ZLE to skip zle when guessing. .It Sy e Byte swap the block @@ -260,6 +262,8 @@ Dump gang block header Dump indirect block .It Sy r Dump raw uninterpreted block data +.It Sy v +Verbose output for guessing compression algorithm .El .It Fl s Report statistics on diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index 79720601d62a..a696eb4617d3 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -26,7 +26,7 @@ information on ZFS mountpoints must be stored separately. The output of the command .PP .RS 4 -zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand +zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation .RE .PP for datasets that should be mounted by systemd, should be kept @@ -58,9 +58,9 @@ Then, enable the tracking ZEDLET: .RS 4 ln -s "@zfsexecdir@/zed.d/history_event-zfs-list-cacher.sh" "@sysconfdir@/zfs/zed.d" -systemctl enable zed.service +systemctl enable zfs-zed.service -systemctl restart zed.service +systemctl restart zfs-zed.service .RE .PP Force the running of the ZEDLET by setting canmount=on for at least one dataset in the pool: diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 8d7b0bbb6fba..496363642b9e 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -305,7 +305,8 @@ .Op Fl t Ar instruction-limit .Op Fl m Ar memory-limit .Ar pool script -.Op Ar arg1 No ... +.Op -- +.Ar arg1 No ... .Nm .Cm load-key .Op Fl nr @@ -1544,7 +1545,7 @@ This value represents the threshold block size for including small file blocks into the special allocation class. Blocks smaller than or equal to this value will be assigned to the special allocation class while greater blocks will be assigned to the regular class. Valid values are zero or a power of two -from 512B up to 128K. The default size is 0 which means no small file blocks +from 512B up to 1M. The default size is 0 which means no small file blocks will be allocated in the special class. .Pp Before setting this property, a special class vdev must be added to the @@ -1976,7 +1977,7 @@ If the property is set to .Sy on , the dataset is shared using the default options: .Pp -.Em sec=sys,rw,crossmnt,no_subtree_check,no_root_squash +.Em sec=sys,rw,crossmnt,no_subtree_check .Pp See .Xr exports 5 @@ -4469,7 +4470,8 @@ Display the path's inode change time as the first column of output. .Op Fl t Ar instruction-limit .Op Fl m Ar memory-limit .Ar pool script -.Op Ar arg1 No ... +.Op -- +.Ar arg1 No ... .Xc Executes .Ar script diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index bdad81149b86..adbb723aae72 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -2740,6 +2740,12 @@ Similar to the option in .Nm zpool import . .El +.Bl -tag -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.It Ev ZPOOL_IMPORT_UDEV_TIMEOUT_MS +The maximum time in milliseconds that +.Nm zpool import +will wait for an expected device to be available. +.El .Bl -tag -width "ZPOOL_VDEV_NAME_GUID" .It Ev ZPOOL_VDEV_NAME_GUID Cause diff --git a/module/.gitignore b/module/.gitignore index 1ea8ef0bb811..45e5f9922235 100644 --- a/module/.gitignore +++ b/module/.gitignore @@ -5,6 +5,7 @@ *.dwo .*.cmd .*.d +*.mod /.cache.mk /.tmp_versions diff --git a/module/Makefile.in b/module/Makefile.in index 935bd2663062..ea8b8340d8bb 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -1,11 +1,11 @@ -subdir-m += avl -subdir-m += icp -subdir-m += lua -subdir-m += nvpair -subdir-m += spl -subdir-m += unicode -subdir-m += zcommon -subdir-m += zfs +obj-m += avl/ +obj-m += icp/ +obj-m += lua/ +obj-m += nvpair/ +obj-m += spl/ +obj-m += unicode/ +obj-m += zcommon/ +obj-m += zfs/ INSTALL_MOD_DIR ?= extra @@ -60,14 +60,15 @@ modules_install: modules_uninstall: @# Uninstall the kernel modules kmoddir=$(DESTDIR)$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@ - list='$(subdir-m)'; for subdir in $$list; do \ - $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$subdir; \ + list='$(obj-m)'; for objdir in $$list; do \ + $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \ done distdir: - list='$(subdir-m)'; for subdir in $$list; do \ - (cd @top_srcdir@/module && find $$subdir -name '*.c' -o -name '*.h' -o -name '*.S' |\ - xargs cp --parents -t $$distdir); \ + list='$(obj-m)'; for objdir in $$list; do \ + (cd @top_srcdir@/module && find $$objdir \ + -name '*.c' -o -name '*.h' -o -name '*.S' | \ + xargs -r cp --parents -t @abs_top_builddir@/module/$$distdir); \ done distclean maintainer-clean: clean diff --git a/module/avl/avl.c b/module/avl/avl.c index 736dcee84579..1d2843f0e716 100644 --- a/module/avl/avl.c +++ b/module/avl/avl.c @@ -159,7 +159,7 @@ avl_walk(avl_tree_t *tree, void *oldnode, int left) node = node->avl_child[right]) ; /* - * Otherwise, return thru left children as far as we can. + * Otherwise, return through left children as far as we can. */ } else { for (;;) { diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index e15050635741..fe15d76d16ad 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Initialize AES encryption and decryption key schedules. @@ -40,9 +41,9 @@ void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched) { - aes_impl_ops_t *ops = aes_impl_get_ops(); - aes_key_t *newbie = keysched; - uint_t keysize, i, j; + const aes_impl_ops_t *ops = aes_impl_get_ops(); + aes_key_t *newbie = keysched; + uint_t keysize, i, j; union { uint64_t ka64[4]; uint32_t ka32[8]; @@ -252,12 +253,17 @@ static size_t aes_supp_impl_cnt = 0; static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)]; /* - * Selects the aes operations for encrypt/decrypt/key setup + * Returns the AES operations for encrypt/decrypt/key setup. When a + * SIMD implementation is not allowed in the current context, then + * fallback to the fastest generic implementation. */ -aes_impl_ops_t * -aes_impl_get_ops() +const aes_impl_ops_t * +aes_impl_get_ops(void) { - aes_impl_ops_t *ops = NULL; + if (!kfpu_allowed()) + return (&aes_generic_impl); + + const aes_impl_ops_t *ops = NULL; const uint32_t impl = AES_IMPL_READ(icp_aes_impl); switch (impl) { @@ -266,15 +272,13 @@ aes_impl_get_ops() ops = &aes_fastest_impl; break; case IMPL_CYCLE: - { + /* Cycle through supported implementations */ ASSERT(aes_impl_initialized); ASSERT3U(aes_supp_impl_cnt, >, 0); - /* Cycle through supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt; ops = aes_supp_impl[idx]; - } - break; + break; default: ASSERT3U(impl, <, aes_supp_impl_cnt); ASSERT3U(aes_supp_impl_cnt, >, 0); @@ -288,13 +292,16 @@ aes_impl_get_ops() return (ops); } +/* + * Initialize all supported implementations. + */ void aes_impl_init(void) { aes_impl_ops_t *curr_impl; int i, c; - /* move supported impl into aes_supp_impls */ + /* Move supported implementations into aes_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) { curr_impl = (aes_impl_ops_t *)aes_all_impl[i]; @@ -303,16 +310,21 @@ aes_impl_init(void) } aes_supp_impl_cnt = c; - /* set fastest implementation. assume hardware accelerated is fastest */ + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ #if defined(__x86_64) #if defined(HAVE_AES) - if (aes_aesni_impl.is_supported()) + if (aes_aesni_impl.is_supported()) { memcpy(&aes_fastest_impl, &aes_aesni_impl, sizeof (aes_fastest_impl)); - else + } else #endif + { memcpy(&aes_fastest_impl, &aes_x86_64_impl, sizeof (aes_fastest_impl)); + } #else memcpy(&aes_fastest_impl, &aes_generic_impl, sizeof (aes_fastest_impl)); diff --git a/module/icp/algs/aes/aes_impl_aesni.c b/module/icp/algs/aes/aes_impl_aesni.c index 97f7c3a4781b..222c176aabab 100644 --- a/module/icp/algs/aes/aes_impl_aesni.c +++ b/module/icp/algs/aes/aes_impl_aesni.c @@ -108,7 +108,7 @@ aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4], static boolean_t aes_aesni_will_work(void) { - return (zfs_aes_available()); + return (kfpu_allowed() && zfs_aes_available()); } const aes_impl_ops_t aes_aesni_impl = { diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c index fb41194f8175..f4075f503947 100644 --- a/module/icp/algs/modes/ccm.c +++ b/module/icp/algs/modes/ccm.c @@ -885,15 +885,13 @@ ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, ccm_ctx->ccm_flags |= CCM_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize, ccm_param->authData, ccm_param->ulAuthDataSize, block_size, encrypt_block, xor_block) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (!is_encrypt_init) { /* allocate buffer for storing decrypted plaintext */ @@ -903,7 +901,6 @@ ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, rv = CRYPTO_HOST_MEMORY; } } -out: return (rv); } diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 13bceef0f170..014e90ceff8f 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -29,6 +29,7 @@ #include #include #include +#include #define GHASH(c, d, t, o) \ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ @@ -46,7 +47,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; @@ -168,7 +169,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); uint8_t *ghash, *macp = NULL; int i, rv; @@ -320,7 +321,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; size_t pt_len; size_t remainder; uint8_t *ghash; @@ -427,7 +428,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint8_t *cb; ulong_t remainder = iv_len; ulong_t processed = 0; @@ -481,7 +482,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - gcm_impl_ops_t *gops; + const gcm_impl_ops_t *gops; uint8_t *ghash, *datap, *authp; size_t remainder, processed; @@ -552,8 +553,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GCM_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, @@ -561,7 +561,7 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -out: + return (rv); } @@ -587,8 +587,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GMAC_MODE; } else { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - goto out; + return (CRYPTO_MECHANISM_PARAM_INVALID); } if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, @@ -596,7 +595,7 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } -out: + return (rv); } @@ -646,7 +645,7 @@ const gcm_impl_ops_t *gcm_all_impl[] = { /* Indicate that benchmark has been completed */ static boolean_t gcm_impl_initialized = B_FALSE; -/* Select aes implementation */ +/* Select GCM implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) @@ -660,12 +659,17 @@ static size_t gcm_supp_impl_cnt = 0; static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; /* - * Selects the gcm operation + * Returns the GCM operations for encrypt/decrypt/key setup. When a + * SIMD implementation is not allowed in the current context, then + * fallback to the fastest generic implementation. */ -gcm_impl_ops_t * +const gcm_impl_ops_t * gcm_impl_get_ops() { - gcm_impl_ops_t *ops = NULL; + if (!kfpu_allowed()) + return (&gcm_generic_impl); + + const gcm_impl_ops_t *ops = NULL; const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); switch (impl) { @@ -674,15 +678,13 @@ gcm_impl_get_ops() ops = &gcm_fastest_impl; break; case IMPL_CYCLE: - { + /* Cycle through supported implementations */ ASSERT(gcm_impl_initialized); ASSERT3U(gcm_supp_impl_cnt, >, 0); - /* Cycle through supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; ops = gcm_supp_impl[idx]; - } - break; + break; default: ASSERT3U(impl, <, gcm_supp_impl_cnt); ASSERT3U(gcm_supp_impl_cnt, >, 0); @@ -696,13 +698,16 @@ gcm_impl_get_ops() return (ops); } +/* + * Initialize all supported implementations. + */ void gcm_impl_init(void) { gcm_impl_ops_t *curr_impl; int i, c; - /* move supported impl into aes_supp_impls */ + /* Move supported implementations into gcm_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; @@ -711,15 +716,20 @@ gcm_impl_init(void) } gcm_supp_impl_cnt = c; - /* set fastest implementation. assume hardware accelerated is fastest */ + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) - if (gcm_pclmulqdq_impl.is_supported()) + if (gcm_pclmulqdq_impl.is_supported()) { memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, sizeof (gcm_fastest_impl)); - else + } else #endif + { memcpy(&gcm_fastest_impl, &gcm_generic_impl, sizeof (gcm_fastest_impl)); + } strcpy(gcm_fastest_impl.name, "fastest"); @@ -742,7 +752,7 @@ static const struct { * If we are called before init(), user preference will be saved in * user_sel_impl, and applied in later init() call. This occurs when module * parameter is specified on module load. Otherwise, directly update - * icp_aes_impl. + * icp_gcm_impl. * * @val Name of gcm implementation to use * @param Unused. diff --git a/module/icp/algs/modes/gcm_pclmulqdq.c b/module/icp/algs/modes/gcm_pclmulqdq.c index be00ba37b6a6..8a43ba33a6e5 100644 --- a/module/icp/algs/modes/gcm_pclmulqdq.c +++ b/module/icp/algs/modes/gcm_pclmulqdq.c @@ -52,7 +52,7 @@ gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res) static boolean_t gcm_pclmulqdq_will_work(void) { - return (zfs_pclmulqdq_available()); + return (kfpu_allowed() && zfs_pclmulqdq_available()); } const gcm_impl_ops_t gcm_pclmulqdq_impl = { diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c index 6d85cb7d9e98..7ba165a48511 100644 --- a/module/icp/algs/skein/skein_block.c +++ b/module/icp/algs/skein/skein_block.c @@ -159,7 +159,7 @@ Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) #endif { @@ -385,7 +385,7 @@ Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R)+2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) #endif /* end of looped code definitions */ { @@ -667,7 +667,7 @@ Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr, ts[r + (R) + 2] = ts[r + (R) - 1]; \ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); - /* loop thru it */ + /* loop through it */ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) #endif { diff --git a/module/icp/api/kcf_ctxops.c b/module/icp/api/kcf_ctxops.c index b9b9cb74e04f..21b0977d3634 100644 --- a/module/icp/api/kcf_ctxops.c +++ b/module/icp/api/kcf_ctxops.c @@ -63,7 +63,7 @@ * * Returns: * CRYPTO_SUCCESS when the context template is successfully created. - * CRYPTO_HOST_MEMEORY: mem alloc failure + * CRYPTO_HOST_MEMORY: mem alloc failure * CRYPTO_ARGUMENTS_BAD: NULL storage for the ctx template. * RYPTO_MECHANISM_INVALID: invalid mechanism 'mech'. */ @@ -123,7 +123,7 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, * crypto_create_ctx_template() * * Description: - * Frees the inbedded crypto_spi_ctx_template_t, then the + * Frees the embedded crypto_spi_ctx_template_t, then the * kcf_ctx_template_t. * * Context: diff --git a/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl b/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl index a2c4adcbe6a5..92c9e196a318 100644 --- a/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl +++ b/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl @@ -101,7 +101,7 @@ * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library + * The word 'cryptographic' can be left out if the routines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: diff --git a/module/icp/asm-x86_64/aes/aesopt.h b/module/icp/asm-x86_64/aes/aesopt.h index 6aa61db8275a..472111f96e59 100644 --- a/module/icp/asm-x86_64/aes/aesopt.h +++ b/module/icp/asm-x86_64/aes/aesopt.h @@ -327,7 +327,7 @@ extern "C" { * On some systems speed will be improved by aligning the AES large lookup * tables on particular boundaries. This define should be set to a power of * two giving the desired alignment. It can be left undefined if alignment - * is not needed. This option is specific to the Micrsoft VC++ compiler - + * is not needed. This option is specific to the Microsoft VC++ compiler - * it seems to sometimes cause trouble for the VC++ version 6 compiler. */ diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c index 741dae7a748e..2642b317d698 100644 --- a/module/icp/core/kcf_mech_tabs.c +++ b/module/icp/core/kcf_mech_tabs.c @@ -103,7 +103,7 @@ kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = { * Per-algorithm internal thresholds for the minimum input size of before * offloading to hardware provider. * Dispatching a crypto operation to a hardware provider entails paying the - * cost of an additional context switch. Measurments with Sun Accelerator 4000 + * cost of an additional context switch. Measurements with Sun Accelerator 4000 * shows that 512-byte jobs or smaller are better handled in software. * There is room for refinement here. * diff --git a/module/icp/core/kcf_sched.c b/module/icp/core/kcf_sched.c index da2346f7ec21..c8c2bbd42b9a 100644 --- a/module/icp/core/kcf_sched.c +++ b/module/icp/core/kcf_sched.c @@ -182,7 +182,7 @@ kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx, * reached, signal the creator thread for more threads. * * If the two conditions above are not met, we don't need to do - * any thing. The request will be picked up by one of the + * anything. The request will be picked up by one of the * worker threads when it becomes available. */ static int @@ -1182,7 +1182,7 @@ kcf_aop_done(kcf_areq_node_t *areq, int error) /* * Handle recoverable errors. This has to be done first - * before doing any thing else in this routine so that + * before doing anything else in this routine so that * we do not change the state of the request. */ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { @@ -1432,7 +1432,7 @@ crypto_cancel_req(crypto_req_id_t id) /* * There is no interface to remove an entry * once it is on the taskq. So, we do not do - * any thing for a hardware provider. + * anything for a hardware provider. */ break; default: @@ -1535,7 +1535,7 @@ kcf_misc_kstat_update(kstat_t *ksp, int rw) } /* - * Allocate and initiatize a kcf_dual_req, used for saving the arguments of + * Allocate and initialize a kcf_dual_req, used for saving the arguments of * a dual operation or an atomic operation that has to be internally * simulated with multiple single steps. * crq determines the memory allocation flags. @@ -1551,7 +1551,7 @@ kcf_alloc_req(crypto_call_req_t *crq) if (kcr == NULL) return (NULL); - /* Copy the whole crypto_call_req struct, as it isn't persistant */ + /* Copy the whole crypto_call_req struct, as it isn't persistent */ if (crq != NULL) kcr->kr_callreq = *crq; else @@ -1579,7 +1579,7 @@ kcf_next_req(void *next_req_arg, int status) kcf_provider_desc_t *pd = NULL; crypto_dual_data_t *ct = NULL; - /* Stop the processing if an error occured at this step */ + /* Stop the processing if an error occurred at this step */ if (error != CRYPTO_SUCCESS) { out: areq->an_reqarg = next_req->kr_callreq; diff --git a/module/icp/illumos-crypto.c b/module/icp/illumos-crypto.c index c2fcf1ff729c..3c5ef4393940 100644 --- a/module/icp/illumos-crypto.c +++ b/module/icp/illumos-crypto.c @@ -93,7 +93,7 @@ * will use the generic implementation. * * 7) Removing sha384 and sha512 code: The sha code was actually very - * wasy to port. However, the generic sha384 and sha512 code actually + * easy to port. However, the generic sha384 and sha512 code actually * exceeds the stack size on arm and powerpc architectures. In an effort * to remove warnings, this code was removed. * diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index 95cfddf9e0a4..a0b82ade4559 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -162,7 +162,7 @@ typedef enum aes_mech_type { #endif /* _AES_IMPL */ /* - * Methods used to define aes implementation + * Methods used to define AES implementation * * @aes_gen_f Key generation * @aes_enc_f Function encrypts one block @@ -201,9 +201,9 @@ extern const aes_impl_ops_t aes_aesni_impl; void aes_impl_init(void); /* - * Get selected aes implementation + * Returns optimal allowed AES implementation */ -struct aes_impl_ops *aes_impl_get_ops(void); +const struct aes_impl_ops *aes_impl_get_ops(void); #ifdef __cplusplus } diff --git a/module/icp/include/modes/gcm_impl.h b/module/icp/include/modes/gcm_impl.h index cbb904c059b7..28c8f63a7d46 100644 --- a/module/icp/include/modes/gcm_impl.h +++ b/module/icp/include/modes/gcm_impl.h @@ -37,12 +37,12 @@ extern "C" { #include /* - * Methods used to define gcm implementation + * Methods used to define GCM implementation * * @gcm_mul_f Perform carry-less multiplication * @gcm_will_work_f Function tests whether implementation will function */ -typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); +typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); typedef boolean_t (*gcm_will_work_f)(void); #define GCM_IMPL_NAME_MAX (16) @@ -64,9 +64,9 @@ extern const gcm_impl_ops_t gcm_pclmulqdq_impl; void gcm_impl_init(void); /* - * Get selected aes implementation + * Returns optimal allowed GCM implementation */ -struct gcm_impl_ops *gcm_impl_get_ops(void); +const struct gcm_impl_ops *gcm_impl_get_ops(void); #ifdef __cplusplus } diff --git a/module/icp/include/sys/crypto/impl.h b/module/icp/include/sys/crypto/impl.h index 258cb5fedcd0..0f37f3f63532 100644 --- a/module/icp/include/sys/crypto/impl.h +++ b/module/icp/include/sys/crypto/impl.h @@ -237,7 +237,7 @@ typedef struct kcf_provider_list { struct kcf_provider_desc *pl_provider; } kcf_provider_list_t; -/* atomic operations in linux implictly form a memory barrier */ +/* atomic operations in linux implicitly form a memory barrier */ #define membar_exit() /* diff --git a/module/icp/include/sys/crypto/sched_impl.h b/module/icp/include/sys/crypto/sched_impl.h index 32ffa774957b..85ea0ba1d092 100644 --- a/module/icp/include/sys/crypto/sched_impl.h +++ b/module/icp/include/sys/crypto/sched_impl.h @@ -381,7 +381,7 @@ typedef struct kcf_pool { /* * cv & lock for the condition where more threads need to be - * created. kp_user_lock also protects the three fileds above. + * created. kp_user_lock also protects the three fields above. */ kcondvar_t kp_user_cv; /* Creator cond. variable */ kmutex_t kp_user_lock; /* Creator lock */ @@ -448,13 +448,13 @@ typedef struct kcf_ntfy_elem { * The following values are based on the assumption that it would * take around eight cpus to load a hardware provider (This is true for * at least one product) and a kernel client may come from different - * low-priority interrupt levels. We will have CYRPTO_TASKQ_MIN number + * low-priority interrupt levels. We will have CRYPTO_TASKQ_MIN number * of cached taskq entries. The CRYPTO_TASKQ_MAX number is based on * a throughput of 1GB/s using 512-byte buffers. These are just * reasonable estimates and might need to change in future. */ #define CRYPTO_TASKQ_THREADS 8 -#define CYRPTO_TASKQ_MIN 64 +#define CRYPTO_TASKQ_MIN 64 #define CRYPTO_TASKQ_MAX 2 * 1024 * 1024 extern int crypto_taskq_threads; diff --git a/module/icp/include/sys/crypto/spi.h b/module/icp/include/sys/crypto/spi.h index 0aae9181adc7..2c62b5706651 100644 --- a/module/icp/include/sys/crypto/spi.h +++ b/module/icp/include/sys/crypto/spi.h @@ -699,7 +699,7 @@ typedef struct crypto_provider_info { /* * Provider status passed by a provider to crypto_provider_notification(9F) - * and returned by the provider_stauts(9E) entry point. + * and returned by the provider_status(9E) entry point. */ #define CRYPTO_PROVIDER_READY 0 #define CRYPTO_PROVIDER_BUSY 1 diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c index 53b193693869..788bcef7d1e2 100644 --- a/module/icp/io/aes.c +++ b/module/icp/io/aes.c @@ -206,7 +206,7 @@ aes_mod_init(void) { int ret; - /* find fastest implementations and set any requested implementations */ + /* Determine the fastest available implementation. */ aes_impl_init(); gcm_impl_init(); diff --git a/module/icp/os/modhash.c b/module/icp/os/modhash.c index 497e84396665..5e216ed6a04a 100644 --- a/module/icp/os/modhash.c +++ b/module/icp/os/modhash.c @@ -48,7 +48,7 @@ * The number returned need _not_ be between 0 and nchains. The mod_hash * code will take care of doing that. The second argument (after the * key) to the hashing function is a void * that represents - * hash_alg_data-- this is provided so that the hashing algrorithm can + * hash_alg_data-- this is provided so that the hashing algorithm can * maintain some state across calls, or keep algorithm-specific * constants associated with the hash table. * diff --git a/module/icp/spi/kcf_spi.c b/module/icp/spi/kcf_spi.c index 0a6e38df8625..e438b58105b6 100644 --- a/module/icp/spi/kcf_spi.c +++ b/module/icp/spi/kcf_spi.c @@ -40,7 +40,7 @@ * minalloc and maxalloc values to be used for taskq_create(). */ int crypto_taskq_threads = CRYPTO_TASKQ_THREADS; -int crypto_taskq_minalloc = CYRPTO_TASKQ_MIN; +int crypto_taskq_minalloc = CRYPTO_TASKQ_MIN; int crypto_taskq_maxalloc = CRYPTO_TASKQ_MAX; static void remove_provider(kcf_provider_desc_t *); diff --git a/module/lua/ldebug.c b/module/lua/ldebug.c index 15fe91b0b768..32bb908cd505 100644 --- a/module/lua/ldebug.c +++ b/module/lua/ldebug.c @@ -324,6 +324,7 @@ static void kname (Proto *p, int pc, int c, const char **name) { if (ISK(c)) { /* is 'c' a constant? */ TValue *kvalue = &p->k[INDEXK(c)]; if (ttisstring(kvalue)) { /* literal constant? */ + // cppcheck-suppress autoVariables *name = svalue(kvalue); /* it is its own name */ return; } diff --git a/module/lua/ldo.c b/module/lua/ldo.c index aca02b234770..d550cb5bfdba 100644 --- a/module/lua/ldo.c +++ b/module/lua/ldo.c @@ -61,7 +61,7 @@ #elif defined(__mips__) #define JMP_BUF_CNT 12 #elif defined(__s390x__) -#define JMP_BUF_CNT 9 +#define JMP_BUF_CNT 18 #else #define JMP_BUF_CNT 1 #endif @@ -168,6 +168,7 @@ int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) { struct lua_longjmp lj; lj.status = LUA_OK; lj.previous = L->errorJmp; /* chain new error handler */ + // cppcheck-suppress autoVariables L->errorJmp = &lj; LUAI_TRY(L, &lj, (*f)(L, ud); diff --git a/module/lua/llex.c b/module/lua/llex.c index 8760155d0546..50c301f599f1 100644 --- a/module/lua/llex.c +++ b/module/lua/llex.c @@ -431,9 +431,12 @@ static int llex (LexState *ls, SemInfo *seminfo) { if (sep >= 0) { read_long_string(ls, seminfo, sep); return TK_STRING; - } - else if (sep == -1) return '['; - else lexerror(ls, "invalid long string delimiter", TK_STRING); + } else if (sep == -1) { + return '['; + } else { + lexerror(ls, "invalid long string delimiter", TK_STRING); + break; + } } case '=': { next(ls); diff --git a/module/lua/llimits.h b/module/lua/llimits.h index eee8f0c2d538..2126a14648dc 100644 --- a/module/lua/llimits.h +++ b/module/lua/llimits.h @@ -98,7 +98,7 @@ typedef LUAI_UACNUMBER l_uacNumber; /* ** non-return type ** -** Supress noreturn attribute in kernel builds to avoid objtool check warnings +** Suppress noreturn attribute in kernel builds to avoid objtool check warnings */ #if defined(__GNUC__) && !defined(_KERNEL) #define l_noret void __attribute__((noreturn)) diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index 5f6423ccce73..a47b94c48e08 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -1872,7 +1872,7 @@ nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...) * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate * multiple levels of embedded nvlists, with 'sep' as the separator. As an * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or - * "a.d[3].e[1]". This matches the C syntax for array embed (for convience, + * "a.d[3].e[1]". This matches the C syntax for array embed (for convenience, * code also supports "a.d[3]e[1]" syntax). * * If 'ip' is non-NULL and the last name component is an array, return the @@ -2558,7 +2558,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, #else int host_endian = 0; #endif /* _LITTLE_ENDIAN */ - nvs_header_t *nvh = (void *)buf; + nvs_header_t *nvh; if (buflen == NULL || nvl == NULL || (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) @@ -2577,6 +2577,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, if (buf == NULL || *buflen < sizeof (nvs_header_t)) return (EINVAL); + nvh = (void *)buf; nvh->nvh_encoding = encoding; nvh->nvh_endian = nvl_endian = host_endian; nvh->nvh_reserved1 = 0; @@ -2588,6 +2589,7 @@ nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, return (EINVAL); /* get method of encoding from first byte */ + nvh = (void *)buf; encoding = nvh->nvh_encoding; nvl_endian = nvh->nvh_endian; break; @@ -3105,7 +3107,7 @@ nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) * * An xdr packed nvlist is encoded as: * - * - encoding methode and host endian (4 bytes) + * - encoding method and host endian (4 bytes) * - nvl_version (4 bytes) * - nvl_nvflag (4 bytes) * @@ -3499,7 +3501,7 @@ nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) * the strings. These pointers are not encoded into the packed xdr buffer. * * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are - * of length 0, then each string is endcoded in xdr format as a single word. + * of length 0, then each string is encoded in xdr format as a single word. * Therefore when expanded to an nvpair there will be 2.25 word used for * each string. (a int64_t allocated for pointer usage, and a single char * for the null termination.) diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in index 3bcbf63cbc63..e16666aa94f3 100644 --- a/module/spl/Makefile.in +++ b/module/spl/Makefile.in @@ -16,10 +16,8 @@ $(MODULE)-objs += spl-kmem.o $(MODULE)-objs += spl-kmem-cache.o $(MODULE)-objs += spl-kobj.o $(MODULE)-objs += spl-kstat.o -$(MODULE)-objs += spl-mutex.o $(MODULE)-objs += spl-proc.o $(MODULE)-objs += spl-procfs-list.o -$(MODULE)-objs += spl-rwlock.o $(MODULE)-objs += spl-taskq.o $(MODULE)-objs += spl-thread.o $(MODULE)-objs += spl-tsd.o diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index a7a9d1db9a98..3cc33da6298a 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -26,8 +26,44 @@ #include #include +#include #include #include +#include + +#include + +#ifdef HAVE_SCHED_SIGNAL_HEADER +#include +#endif + +#define MAX_HRTIMEOUT_SLACK_US 1000 +unsigned int spl_schedule_hrtimeout_slack_us = 0; + +static int +param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) +{ + unsigned long val; + int error; + + error = kstrtoul(buf, 0, &val); + if (error) + return (error); + + if (val > MAX_HRTIMEOUT_SLACK_US) + return (-EINVAL); + + error = param_set_uint(buf, kp); + if (error < 0) + return (error); + + return (0); +} + +module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, + param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); +MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, + "schedule_hrtimeout_range() delta/slack value in us, default(0)"); void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) @@ -144,10 +180,21 @@ __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_io); -void +int +__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) +{ + cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); + + return (signal_pending(current) ? 0 : 1); +} +EXPORT_SYMBOL(__cv_wait_io_sig); + +int __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) { cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); + + return (signal_pending(current) ? 0 : 1); } EXPORT_SYMBOL(__cv_wait_sig); @@ -287,12 +334,13 @@ EXPORT_SYMBOL(__cv_timedwait_sig); */ static clock_t __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, - int state) + hrtime_t res, int state) { DEFINE_WAIT(wait); kmutex_t *m; hrtime_t time_left; ktime_t ktime_left; + u64 slack = 0; ASSERT(cvp); ASSERT(mp); @@ -319,13 +367,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, * race where 'cvp->cv_waiters > 0' but the list is empty. */ mutex_exit(mp); - /* - * Allow a 100 us range to give kernel an opportunity to coalesce - * interrupts - */ + ktime_left = ktime_set(0, time_left); - schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC, - HRTIMER_MODE_REL); + slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), + MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); + schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { @@ -352,19 +398,10 @@ static clock_t cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag, int state) { - if (res > 1) { - /* - * Align expiration to the specified resolution. - */ - if (flag & CALLOUT_FLAG_ROUNDUP) - tim += res - 1; - tim = (tim / res) * res; - } - if (!(flag & CALLOUT_FLAG_ABSOLUTE)) tim += gethrtime(); - return (__cv_timedwait_hires(cvp, mp, tim, state)); + return (__cv_timedwait_hires(cvp, mp, tim, res, state)); } clock_t @@ -394,8 +431,8 @@ __cv_signal(kcondvar_t *cvp) /* * All waiters are added with WQ_FLAG_EXCLUSIVE so only one - * waiter will be set runable with each call to wake_up(). - * Additionally wake_up() holds a spin_lock assoicated with + * waiter will be set runnable with each call to wake_up(). + * Additionally wake_up() holds a spin_lock associated with * the wait queue to ensure we don't race waking up processes. */ if (atomic_read(&cvp->cv_waiters) > 0) diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index cd2fa2020510..92f059a90549 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -79,7 +79,7 @@ EXPORT_SYMBOL(p0); * to generate words larger than 128 bits will paradoxically be limited to * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` * 128-bit words and selecting the first will implicitly select the second. If - * a caller finds this behavior undesireable, random_get_bytes() should be used + * a caller finds this behavior undesirable, random_get_bytes() should be used * instead. * * XXX: Linux interrupt handlers that trigger within the critical section @@ -207,7 +207,7 @@ nlz64(uint64_t x) /* * Newer kernels have a div_u64() function but we define our own - * to simplify portibility between kernel versions. + * to simplify portability between kernel versions. */ static inline uint64_t __div_u64(uint64_t u, uint32_t v) @@ -273,7 +273,9 @@ int64_t __divdi3(int64_t u, int64_t v) { int64_t q, t; + // cppcheck-suppress shiftTooManyBitsSigned q = __udivdi3(abs64(u), abs64(v)); + // cppcheck-suppress shiftTooManyBitsSigned t = (u ^ v) >> 63; // If u, v have different return ((q ^ t) - t); // signs, negate q. } @@ -649,7 +651,7 @@ static void __init spl_random_init(void) { uint64_t s[2]; - int i; + int i = 0; get_random_bytes(s, sizeof (s)); @@ -694,51 +696,41 @@ spl_init(void) if ((rc = spl_kvmem_init())) goto out1; - if ((rc = spl_mutex_init())) - goto out2; - - if ((rc = spl_rw_init())) - goto out3; - if ((rc = spl_tsd_init())) - goto out4; + goto out2; if ((rc = spl_taskq_init())) - goto out5; + goto out3; if ((rc = spl_kmem_cache_init())) - goto out6; + goto out4; if ((rc = spl_vn_init())) - goto out7; + goto out5; if ((rc = spl_proc_init())) - goto out8; + goto out6; if ((rc = spl_kstat_init())) - goto out9; + goto out7; if ((rc = spl_zlib_init())) - goto out10; + goto out8; return (rc); -out10: - spl_kstat_fini(); -out9: - spl_proc_fini(); out8: - spl_vn_fini(); + spl_kstat_fini(); out7: - spl_kmem_cache_fini(); + spl_proc_fini(); out6: - spl_taskq_fini(); + spl_vn_fini(); out5: - spl_tsd_fini(); + spl_kmem_cache_fini(); out4: - spl_rw_fini(); + spl_taskq_fini(); out3: - spl_mutex_fini(); + spl_tsd_fini(); out2: spl_kvmem_fini(); out1: @@ -755,8 +747,6 @@ spl_fini(void) spl_kmem_cache_fini(); spl_taskq_fini(); spl_tsd_fini(); - spl_rw_fini(); - spl_mutex_fini(); spl_kvmem_fini(); } diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 44e112cccbd9..7baf56de6f93 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -185,7 +185,7 @@ MODULE_PARM_DESC(spl_kmem_cache_kmem_threads, struct list_head spl_kmem_cache_list; /* List of caches */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ -taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */ +taskq_t *spl_kmem_cache_taskq; /* Task queue for aging / reclaim */ static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj); @@ -312,7 +312,7 @@ static spl_kmem_slab_t * spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { spl_kmem_slab_t *sks; - spl_kmem_obj_t *sko, *n; + spl_kmem_obj_t *sko; void *base, *obj; uint32_t obj_size, offslab_size = 0; int i, rc = 0; @@ -356,6 +356,7 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) out: if (rc) { + spl_kmem_obj_t *n = NULL; if (skc->skc_flags & KMC_OFFSLAB) list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { @@ -405,8 +406,8 @@ spl_slab_free(spl_kmem_slab_t *sks, static void spl_slab_reclaim(spl_kmem_cache_t *skc) { - spl_kmem_slab_t *sks, *m; - spl_kmem_obj_t *sko, *n; + spl_kmem_slab_t *sks = NULL, *m = NULL; + spl_kmem_obj_t *sko = NULL, *n = NULL; LIST_HEAD(sks_list); LIST_HEAD(sko_list); uint32_t size = 0; @@ -802,7 +803,7 @@ spl_magazine_free(spl_kmem_magazine_t *skm) static int spl_magazine_create(spl_kmem_cache_t *skc) { - int i; + int i = 0; if (skc->skc_flags & KMC_NOMAGAZINE) return (0); @@ -833,7 +834,7 @@ static void spl_magazine_destroy(spl_kmem_cache_t *skc) { spl_kmem_magazine_t *skm; - int i; + int i = 0; if (skc->skc_flags & KMC_NOMAGAZINE) return; @@ -862,11 +863,11 @@ spl_magazine_destroy(spl_kmem_cache_t *skc) * KMC_VMEM Force SPL vmem backed cache * KMC_SLAB Force Linux slab backed cache * KMC_OFFSLAB Locate objects off the slab - * KMC_NOTOUCH unsupported - * KMC_NODEBUG unsupported - * KMC_NOHASH unsupported - * KMC_QCACHE unsupported - * KMC_NOMAGAZINE unsupported + * KMC_NOTOUCH Disable cache object aging (unsupported) + * KMC_NODEBUG Disable debugging (unsupported) + * KMC_NOHASH Disable hashing (unsupported) + * KMC_QCACHE Disable qcache (unsupported) + * KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab */ spl_kmem_cache_t * spl_kmem_cache_create(char *name, size_t size, size_t align, @@ -995,7 +996,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, #if defined(SLAB_USERCOPY) /* * Required for PAX-enabled kernels if the slab is to be - * used for coping between user and kernel space. + * used for copying between user and kernel space. */ slabflags |= SLAB_USERCOPY; #endif @@ -1453,6 +1454,17 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) obj = kmem_cache_alloc(slc, kmem_flags_convert(flags)); } while ((obj == NULL) && !(flags & KM_NOSLEEP)); + if (obj != NULL) { + /* + * Even though we leave everything up to the + * underlying cache we still keep track of + * how many objects we've allocated in it for + * better debuggability. + */ + spin_lock(&skc->skc_lock); + skc->skc_obj_alloc++; + spin_unlock(&skc->skc_lock); + } goto ret; } @@ -1526,6 +1538,9 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) */ if (skc->skc_flags & KMC_SLAB) { kmem_cache_free(skc->skc_linux_cache, obj); + spin_lock(&skc->skc_lock); + skc->skc_obj_alloc--; + spin_unlock(&skc->skc_lock); return; } @@ -1603,7 +1618,7 @@ static spl_shrinker_t __spl_kmem_cache_generic_shrinker(struct shrinker *shrink, struct shrink_control *sc) { - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; int alloc = 0; /* diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 1fdb61e6fce1..cee69ad4346a 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -180,7 +180,8 @@ spl_kmem_alloc_impl(size_t size, int flags, int node) */ if ((size > spl_kmem_alloc_max) || use_vmem) { if (flags & KM_VMEM) { - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, + PAGE_KERNEL); } else { return (NULL); } @@ -302,7 +303,7 @@ kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, const void *addr) { struct hlist_head *head; - struct hlist_node *node; + struct hlist_node *node = NULL; struct kmem_debug *p; unsigned long flags; @@ -499,7 +500,7 @@ static void spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) { unsigned long flags; - kmem_debug_t *kd; + kmem_debug_t *kd = NULL; char str[17]; spin_lock_irqsave(lock, flags); diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c index 1f67bf157f06..c97b6d6cbcb7 100644 --- a/module/spl/spl-kstat.c +++ b/module/spl/spl-kstat.c @@ -431,7 +431,7 @@ static struct seq_operations kstat_seq_ops = { static kstat_module_t * kstat_find_module(char *name) { - kstat_module_t *module; + kstat_module_t *module = NULL; list_for_each_entry(module, &kstat_module_list, ksm_module_list) { if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0) @@ -624,7 +624,7 @@ static int kstat_detect_collision(kstat_proc_entry_t *kpep) { kstat_module_t *module; - kstat_proc_entry_t *tmp; + kstat_proc_entry_t *tmp = NULL; char *parent; char *cp; @@ -659,7 +659,7 @@ kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode, const struct file_operations *file_ops, void *data) { kstat_module_t *module; - kstat_proc_entry_t *tmp; + kstat_proc_entry_t *tmp = NULL; ASSERT(kpep); diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c deleted file mode 100644 index ba818862b679..000000000000 --- a/module/spl/spl-mutex.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Mutex Implementation. - */ - -#include - -int spl_mutex_init(void) { return 0; } -void spl_mutex_fini(void) { } diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index a75bcc2145bd..c0c13913cdff 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -144,7 +144,7 @@ proc_doslab(struct ctl_table *table, int write, int rc = 0; unsigned long min = 0, max = ~0, val = 0, mask; spl_ctl_table dummy = *table; - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; dummy.data = &val; dummy.proc_handler = &proc_dointvec; @@ -249,7 +249,7 @@ static int taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag) { taskq_t *tq = p; - taskq_thread_t *tqt; + taskq_thread_t *tqt = NULL; spl_wait_queue_entry_t *wq; struct task_struct *tsk; taskq_ent_t *tqe; @@ -437,11 +437,29 @@ slab_seq_show(struct seq_file *f, void *p) ASSERT(skc->skc_magic == SKC_MAGIC); - /* - * Backed by Linux slab see /proc/slabinfo. - */ - if (skc->skc_flags & KMC_SLAB) + if (skc->skc_flags & KMC_SLAB) { + /* + * This cache is backed by a generic Linux kmem cache which + * has its own accounting. For these caches we only track + * the number of active allocated objects that exist within + * the underlying Linux slabs. For the overall statistics of + * the underlying Linux cache please refer to /proc/slabinfo. + */ + spin_lock(&skc->skc_lock); + seq_printf(f, "%-36s ", skc->skc_name); + seq_printf(f, "0x%05lx %9s %9lu %8s %8u " + "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", + (long unsigned)skc->skc_flags, + "-", + (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), + "-", + (unsigned)skc->skc_obj_size, + "-", "-", "-", "-", + (long unsigned)skc->skc_obj_alloc, + "-", "-", "-", "-"); + spin_unlock(&skc->skc_lock); return (0); + } spin_lock(&skc->skc_lock); seq_printf(f, "%-36s ", skc->skc_name); @@ -461,9 +479,7 @@ slab_seq_show(struct seq_file *f, void *p) (long unsigned)skc->skc_obj_deadlock, (long unsigned)skc->skc_obj_emergency, (long unsigned)skc->skc_obj_emergency_max); - spin_unlock(&skc->skc_lock); - return (0); } diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c deleted file mode 100644 index 86727ed1957c..000000000000 --- a/module/spl/spl-rwlock.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Reader/Writer Lock Implementation. - */ - -#include -#include - -#if defined(CONFIG_PREEMPT_RT_FULL) - -#include -#define RT_MUTEX_OWNER_MASKALL 1UL - -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ -#if defined(READER_BIAS) && defined(WRITER_BIAS) - /* - * After the 4.9.20-rt16 kernel the realtime patch series lifted the - * single reader restriction. While this could be accommodated by - * adding additional compatibility code assume the rwsem can never - * be upgraded. All caller must already cleanly handle this case. - */ - return (0); -#else - ASSERT((struct task_struct *) - ((unsigned long)rwsem->lock.owner & ~RT_MUTEX_OWNER_MASKALL) == - current); - - /* - * Prior to 4.9.20-rt16 kernel the realtime patch series, rwsem is - * implemented as a single mutex held by readers and writers alike. - * However, this implementation would prevent a thread from taking - * a read lock twice, as the mutex would already be locked on - * the second attempt. Therefore the implementation allows a - * single thread to take a rwsem as read lock multiple times - * tracking that nesting as read_depth counter. - */ - if (rwsem->read_depth <= 1) { - /* - * In case, the current thread has not taken the lock - * more than once as read lock, we can allow an - * upgrade to a write lock. rwsem_rt.h implements - * write locks as read_depth == 0. - */ - rwsem->read_depth = 0; - return (1); - } - return (0); -#endif -} -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - int ret = 0; - unsigned long flags; - spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags); - if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE && - list_empty(&rwsem->wait_list)) { - ret = 1; - RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE; - } - spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags); - return (ret); -} -#elif defined(RWSEM_ACTIVE_MASK) -#if defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - long val; - val = atomic_long_cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - typeof(rwsem->count) val; - val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#endif -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - return (0); -} -#endif - -int -rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - if (__rwsem_tryupgrade(rwsem)) { - rwsem_release(&rwsem->dep_map, 1, _RET_IP_); - rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - rwsem->owner = current; -#endif - return (1); - } - return (0); -} -EXPORT_SYMBOL(rwsem_tryupgrade); - -int spl_rw_init(void) { return 0; } -void spl_rw_fini(void) { } diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c index a39f94e4cc20..a65c95615db0 100644 --- a/module/spl/spl-taskq.c +++ b/module/spl/spl-taskq.c @@ -82,7 +82,7 @@ task_km_flags(uint_t flags) static int taskq_find_by_name(const char *name) { - struct list_head *tql; + struct list_head *tql = NULL; taskq_t *tq; list_for_each_prev(tql, &tq_list) { @@ -211,7 +211,7 @@ task_expire_impl(taskq_ent_t *t) { taskq_ent_t *w; taskq_t *tq = t->tqent_taskq; - struct list_head *l; + struct list_head *l = NULL; unsigned long flags; spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); @@ -298,7 +298,7 @@ static void taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt) { taskq_thread_t *w; - struct list_head *l; + struct list_head *l = NULL; ASSERT(tq); ASSERT(tqt); @@ -321,7 +321,7 @@ taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt) static taskq_ent_t * taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id) { - struct list_head *l; + struct list_head *l = NULL; taskq_ent_t *t; list_for_each(l, lh) { @@ -347,7 +347,7 @@ static taskq_ent_t * taskq_find(taskq_t *tq, taskqid_t id) { taskq_thread_t *tqt; - struct list_head *l; + struct list_head *l = NULL; taskq_ent_t *t; t = taskq_find_list(tq, &tq->tq_delay_list, id); @@ -1198,7 +1198,7 @@ param_set_taskq_kick(const char *val, struct kernel_param *kp) #endif { int ret; - taskq_t *tq; + taskq_t *tq = NULL; taskq_ent_t *t; unsigned long flags; diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c index d441ad65f317..0352a31ea835 100644 --- a/module/spl/spl-thread.c +++ b/module/spl/spl-thread.c @@ -153,8 +153,9 @@ spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...) if (PTR_ERR(tsk) == -ENOMEM) continue; return (NULL); - } else + } else { return (tsk); + } } while (1); } EXPORT_SYMBOL(spl_kthread_create); diff --git a/module/spl/spl-tsd.c b/module/spl/spl-tsd.c index 4c800292ae77..b955ed65470f 100644 --- a/module/spl/spl-tsd.c +++ b/module/spl/spl-tsd.c @@ -42,7 +42,7 @@ * type is entry is called a 'key' entry and it is added to the hash during * tsd_create(). It is used to store the address of the destructor function * and it is used as an anchor point. All tsd entries which use the same - * key will be linked to this entry. This is used during tsd_destory() to + * key will be linked to this entry. This is used during tsd_destroy() to * quickly call the destructor function for all tsd associated with the key. * The 'key' entry may be looked up with tsd_hash_search() by passing the * key you wish to lookup and DTOR_PID constant as the pid. @@ -98,7 +98,7 @@ static tsd_hash_table_t *tsd_hash_table = NULL; static tsd_hash_entry_t * tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid) { - struct hlist_node *node; + struct hlist_node *node = NULL; tsd_hash_entry_t *entry; tsd_hash_bin_t *bin; ulong_t hash; @@ -269,7 +269,7 @@ tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor) * @table: hash table * @pid: search pid * - * For every process these is a single entry in the hash which is used + * For every process there is a single entry in the hash which is used * as anchor. All other thread specific entries for this process are * linked to this anchor via the 'he_pid_list' list head. */ diff --git a/module/spl/spl-vmem.c b/module/spl/spl-vmem.c index e1a84a9117bf..a2630ecdd189 100644 --- a/module/spl/spl-vmem.c +++ b/module/spl/spl-vmem.c @@ -50,7 +50,7 @@ EXPORT_SYMBOL(zio_arena); size_t vmem_size(vmem_t *vmp, int typemask) { - spl_kmem_cache_t *skc; + spl_kmem_cache_t *skc = NULL; size_t alloc = VMEM_FLOOR_SIZE; if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE)) diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index ef5f60540444..032bd1aba9c0 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -489,7 +489,7 @@ EXPORT_SYMBOL(vn_space); static file_t * file_find(int fd, struct task_struct *task) { - file_t *fp; + file_t *fp = NULL; list_for_each_entry(fp, &vn_file_list, f_list) { if (fd == fp->f_fd && fp->f_task == task) { @@ -641,68 +641,6 @@ vn_areleasef(int fd, uf_info_t *fip) } /* releasef() */ EXPORT_SYMBOL(areleasef); - -static void -#ifdef HAVE_SET_FS_PWD_WITH_CONST -vn_set_fs_pwd(struct fs_struct *fs, const struct path *path) -#else -vn_set_fs_pwd(struct fs_struct *fs, struct path *path) -#endif /* HAVE_SET_FS_PWD_WITH_CONST */ -{ - struct path old_pwd; - -#ifdef HAVE_FS_STRUCT_SPINLOCK - spin_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - spin_unlock(&fs->lock); -#else - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); -#endif /* HAVE_FS_STRUCT_SPINLOCK */ - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -int -vn_set_pwd(const char *filename) -{ - struct path path; - mm_segment_t saved_fs; - int rc; - - /* - * user_path_dir() and __user_walk() both expect 'filename' to be - * a user space address so we must briefly increase the data segment - * size to ensure strncpy_from_user() does not fail with -EFAULT. - */ - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = user_path_dir(filename, &path); - if (rc) - goto out; - - rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); - if (rc) - goto dput_and_out; - - vn_set_fs_pwd(current->fs, &path); - -dput_and_out: - path_put(&path); -out: - set_fs(saved_fs); - - return (-rc); -} /* vn_set_pwd() */ -EXPORT_SYMBOL(vn_set_pwd); - static int vn_cache_constructor(void *buf, void *cdrarg, int kmflags) { @@ -760,7 +698,7 @@ spl_vn_init(void) void spl_vn_fini(void) { - file_t *fp, *next_fp; + file_t *fp = NULL, *next_fp = NULL; int leaked = 0; spin_lock(&vn_file_lock); diff --git a/module/spl/spl-zlib.c b/module/spl/spl-zlib.c index 229e6a44b0bc..62423343c1bb 100644 --- a/module/spl/spl-zlib.c +++ b/module/spl/spl-zlib.c @@ -202,7 +202,7 @@ spl_zlib_init(void) zlib_workspace_cache = kmem_cache_create( "spl_zlib_workspace_cache", size, 0, NULL, NULL, NULL, NULL, NULL, - KMC_VMEM | KMC_NOEMERGENCY); + KMC_VMEM); if (!zlib_workspace_cache) return (1); diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index dc0c1161f8b6..9f74f0fbd266 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -485,7 +485,7 @@ zpool_feature_init(void) zfeature_register(SPA_FEATURE_RESILVER_DEFER, "com.datto:resilver_defer", "resilver_defer", - "Support for defering new resilvers when one is already running.", + "Support for deferring new resilvers when one is already running.", ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL); } diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 5daa6907c5d0..a3ff7d8e6991 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -64,6 +64,33 @@ zfs_allocatable_devs(nvlist_t *nv) return (B_FALSE); } +/* + * Are there special vdevs? + */ +boolean_t +zfs_special_devs(nvlist_t *nv) +{ + char *bias; + uint_t c; + nvlist_t **child; + uint_t children; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + return (B_FALSE); + } + for (c = 0; c < children; c++) { + if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS, + &bias) == 0) { + if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0 || + strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0) { + return (B_TRUE); + } + } + } + return (B_FALSE); +} + void zpool_get_load_policy(nvlist_t *nvl, zpool_load_policy_t *zlpp) { @@ -223,6 +250,7 @@ zfs_dataset_name_hidden(const char *name) #if defined(_KERNEL) EXPORT_SYMBOL(zfs_allocatable_devs); +EXPORT_SYMBOL(zfs_special_devs); EXPORT_SYMBOL(zpool_get_load_policy); EXPORT_SYMBOL(zfs_zpl_version_map); EXPORT_SYMBOL(zfs_spa_version_map); diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index 5a991ba6073a..4c9db441b536 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -140,6 +140,7 @@ #include #include #include +#include #define FLETCHER_MIN_SIMD_SIZE 64 @@ -205,21 +206,19 @@ static struct fletcher_4_impl_selector { const char *fis_name; uint32_t fis_sel; } fletcher_4_impl_selectors[] = { -#if !defined(_KERNEL) { "cycle", IMPL_CYCLE }, -#endif { "fastest", IMPL_FASTEST }, { "scalar", IMPL_SCALAR } }; #if defined(_KERNEL) static kstat_t *fletcher_4_kstat; -#endif static struct fletcher_4_kstat { uint64_t native; uint64_t byteswap; } fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1]; +#endif /* Indicate that benchmark has been completed */ static boolean_t fletcher_4_initialized = B_FALSE; @@ -408,32 +407,36 @@ fletcher_4_impl_set(const char *val) return (err); } +/* + * Returns the Fletcher 4 operations for checksums. When a SIMD + * implementation is not allowed in the current context, then fallback + * to the fastest generic implementation. + */ static inline const fletcher_4_ops_t * fletcher_4_impl_get(void) { - fletcher_4_ops_t *ops = NULL; - const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); + if (!kfpu_allowed()) + return (&fletcher_4_superscalar4_ops); + + const fletcher_4_ops_t *ops = NULL; + uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); switch (impl) { case IMPL_FASTEST: ASSERT(fletcher_4_initialized); ops = &fletcher_4_fastest_impl; break; -#if !defined(_KERNEL) - case IMPL_CYCLE: { + case IMPL_CYCLE: + /* Cycle through supported implementations */ ASSERT(fletcher_4_initialized); ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); - static uint32_t cycle_count = 0; uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt; ops = fletcher_4_supp_impls[idx]; - } - break; -#endif + break; default: ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); ASSERT3U(impl, <, fletcher_4_supp_impls_cnt); - ops = fletcher_4_supp_impls[impl]; break; } @@ -592,8 +595,9 @@ fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) } #if defined(_KERNEL) -/* Fletcher 4 kstats */ - +/* + * Fletcher 4 kstats + */ static int fletcher_4_kstat_headers(char *buf, size_t size) { @@ -658,6 +662,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n) typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *, zio_cksum_t *); +#if defined(_KERNEL) static void fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) { @@ -669,7 +674,6 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) zio_cksum_t zc; uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); - fletcher_checksum_func_t *fletcher_4_test = native ? fletcher_4_native : fletcher_4_byteswap; @@ -716,16 +720,18 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) /* restore original selection */ atomic_swap_32(&fletcher_4_impl_chosen, sel_save); } +#endif /* _KERNEL */ -void -fletcher_4_init(void) +/* + * Initialize and benchmark all supported implementations. + */ +static void +fletcher_4_benchmark(void) { - static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */ fletcher_4_ops_t *curr_impl; - char *databuf; int i, c; - /* move supported impl into fletcher_4_supp_impls */ + /* Move supported implementations into fletcher_4_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) { curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i]; @@ -735,19 +741,10 @@ fletcher_4_init(void) membar_producer(); /* complete fletcher_4_supp_impls[] init */ fletcher_4_supp_impls_cnt = c; /* number of supported impl */ -#if !defined(_KERNEL) - /* Skip benchmarking and use last implementation as fastest */ - memcpy(&fletcher_4_fastest_impl, - fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1], - sizeof (fletcher_4_fastest_impl)); - fletcher_4_fastest_impl.name = "fastest"; - membar_producer(); +#if defined(_KERNEL) + static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */ + char *databuf = vmem_alloc(data_size, KM_SLEEP); - fletcher_4_initialized = B_TRUE; - return; -#endif - /* Benchmark all supported implementations */ - databuf = vmem_alloc(data_size, KM_SLEEP); for (i = 0; i < data_size / sizeof (uint64_t); i++) ((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */ @@ -755,9 +752,28 @@ fletcher_4_init(void) fletcher_4_benchmark_impl(B_TRUE, databuf, data_size); vmem_free(databuf, data_size); +#else + /* + * Skip the benchmark in user space to avoid impacting libzpool + * consumers (zdb, zhack, zinject, ztest). The last implementation + * is assumed to be the fastest and used by default. + */ + memcpy(&fletcher_4_fastest_impl, + fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1], + sizeof (fletcher_4_fastest_impl)); + fletcher_4_fastest_impl.name = "fastest"; + membar_producer(); +#endif /* _KERNEL */ +} + +void +fletcher_4_init(void) +{ + /* Determine the fastest available implementation. */ + fletcher_4_benchmark(); #if defined(_KERNEL) - /* install kstats for all implementations */ + /* Install kstats for all implementations */ fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); if (fletcher_4_kstat != NULL) { diff --git a/module/zcommon/zfs_fletcher_aarch64_neon.c b/module/zcommon/zfs_fletcher_aarch64_neon.c index bd2db2b20fe2..3b3c1b52b804 100644 --- a/module/zcommon/zfs_fletcher_aarch64_neon.c +++ b/module/zcommon/zfs_fletcher_aarch64_neon.c @@ -198,7 +198,7 @@ unsigned char SRC __attribute__((vector_size(16))); static boolean_t fletcher_4_aarch64_neon_valid(void) { - return (B_TRUE); + return (kfpu_allowed()); } const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = { diff --git a/module/zcommon/zfs_fletcher_avx512.c b/module/zcommon/zfs_fletcher_avx512.c index 7260a9864be1..0d4cff21a506 100644 --- a/module/zcommon/zfs_fletcher_avx512.c +++ b/module/zcommon/zfs_fletcher_avx512.c @@ -157,7 +157,7 @@ STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap); static boolean_t fletcher_4_avx512f_valid(void) { - return (zfs_avx512f_available()); + return (kfpu_allowed() && zfs_avx512f_available()); } const fletcher_4_ops_t fletcher_4_avx512f_ops = { diff --git a/module/zcommon/zfs_fletcher_intel.c b/module/zcommon/zfs_fletcher_intel.c index 6dac047dad0e..7f12efe6d8c5 100644 --- a/module/zcommon/zfs_fletcher_intel.c +++ b/module/zcommon/zfs_fletcher_intel.c @@ -156,7 +156,7 @@ fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_avx2_valid(void) { - return (zfs_avx_available() && zfs_avx2_available()); + return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available()); } const fletcher_4_ops_t fletcher_4_avx2_ops = { diff --git a/module/zcommon/zfs_fletcher_sse.c b/module/zcommon/zfs_fletcher_sse.c index a0b42e5f5fa8..e6389d6e5db8 100644 --- a/module/zcommon/zfs_fletcher_sse.c +++ b/module/zcommon/zfs_fletcher_sse.c @@ -157,7 +157,7 @@ fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_sse2_valid(void) { - return (zfs_sse2_available()); + return (kfpu_allowed() && zfs_sse2_available()); } const fletcher_4_ops_t fletcher_4_sse2_ops = { @@ -214,7 +214,8 @@ fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size) static boolean_t fletcher_4_ssse3_valid(void) { - return (zfs_sse2_available() && zfs_ssse3_available()); + return (kfpu_allowed() && zfs_sse2_available() && + zfs_ssse3_available()); } const fletcher_4_ops_t fletcher_4_ssse3_ops = { diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 58b23b0e00b0..bf5b77912a1a 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -74,7 +74,7 @@ get_dataset_depth(const char *path) /* * Keep track of nesting until you hit the end of the - * path or found the snapshot/bookmark seperator. + * path or found the snapshot/bookmark separator. */ for (int i = 0; loc[i] != '\0' && loc[i] != '@' && @@ -232,6 +232,27 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what) } } + if (*end == '\0' || *end == '/') { + int component_length = end - start; + /* Validate the contents of this component is not '.' */ + if (component_length == 1) { + if (start[0] == '.') { + if (why) + *why = NAME_ERR_SELF_REF; + return (-1); + } + } + + /* Validate the content of this component is not '..' */ + if (component_length == 2) { + if (start[0] == '.' && start[1] == '.') { + if (why) + *why = NAME_ERR_PARENT_REF; + return (-1); + } + } + } + /* Snapshot or bookmark delimiter found */ if (*end == '@' || *end == '#') { /* Multiple delimiters are not allowed */ diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index dab749138a6b..cddf3e88db2f 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -540,7 +540,7 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE"); zprop_register_number(ZFS_PROP_SPECIAL_SMALL_BLOCKS, "special_small_blocks", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "zero or 512 to 128K, power of 2", "SPECIAL_SMALL_BLOCKS"); + "zero or 512 to 1M, power of 2", "SPECIAL_SMALL_BLOCKS"); /* hidden properties */ zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, @@ -853,10 +853,23 @@ zfs_prop_align_right(zfs_prop_t prop) #endif #if defined(_KERNEL) + +#include + +#if defined(HAVE_KERNEL_FPU_INTERNAL) +union fpregs_state **zfs_kfpu_fpregs; +EXPORT_SYMBOL(zfs_kfpu_fpregs); +#endif /* HAVE_KERNEL_FPU_INTERNAL */ + static int __init zcommon_init(void) { + int error = kfpu_init(); + if (error) + return (error); + fletcher_4_init(); + return (0); } @@ -864,6 +877,7 @@ static void __exit zcommon_fini(void) { fletcher_4_fini(); + kfpu_fini(); } module_init(zcommon_init); diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index ac1c42b3f07b..edb4f60e6f20 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -156,7 +156,7 @@ zpool_name_to_prop(const char *propname) /* * Given a pool property ID, returns the corresponding name. - * Assuming the pool propety ID is valid. + * Assuming the pool property ID is valid. */ const char * zpool_prop_to_name(zpool_prop_t prop) diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 58780ac4ed3e..5522620d2f9d 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -72,17 +72,19 @@ * (2) Fragmentation is less of an issue since when we are at the limit of * allocatable space, we won't have to search around for a long free * hole in the VA space for large ARC allocations. Each chunk is mapped in - * individually, so even if we weren't using segkpm (see next point) we + * individually, so even if we are using HIGHMEM (see next point) we * wouldn't need to worry about finding a contiguous address range. * - * (3) Use of segkpm will avoid the need for map / unmap / TLB shootdown costs - * on each ABD access. (If segkpm isn't available then we use all linear - * ABDs to avoid this penalty.) See seg_kpm.c for more details. + * (3) If we are not using HIGHMEM, then all physical memory is always + * mapped into the kernel's address space, so we also avoid the map / + * unmap costs on each ABD access. + * + * If we are not using HIGHMEM, scattered buffers which have only one chunk + * can be treated as linear buffers, because they are contiguous in the + * kernel's virtual address space. See abd_alloc_pages() for details. * * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to - * B_FALSE. However, it is not possible to use scattered ABDs if segkpm is not - * available, which is the case on all 32-bit systems and any 64-bit systems - * where kpm_enable is turned off. + * B_FALSE. * * In addition to directly allocating a linear or scattered ABD, it is also * possible to create an ABD by requesting the "sub-ABD" starting at an offset @@ -249,18 +251,6 @@ abd_chunkcnt_for_bytes(size_t size) #define __GFP_RECLAIM __GFP_WAIT #endif -static unsigned long -abd_alloc_chunk(int nid, gfp_t gfp, unsigned int order) -{ - struct page *page; - - page = alloc_pages_node(nid, gfp, order); - if (!page) - return (0); - - return ((unsigned long) page_address(page)); -} - /* * The goal is to minimize fragmentation by preferentially populating ABDs * with higher order compound pages from a single zone. Allocation size is @@ -283,19 +273,18 @@ abd_alloc_pages(abd_t *abd, size_t size) size_t remaining_size; int nid = NUMA_NO_NODE; int alloc_pages = 0; - int order; INIT_LIST_HEAD(&pages); while (alloc_pages < nr_pages) { - unsigned long paddr; unsigned chunk_pages; + int order; order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order); chunk_pages = (1U << order); - paddr = abd_alloc_chunk(nid, order ? gfp_comp : gfp, order); - if (paddr == 0) { + page = alloc_pages_node(nid, order ? gfp_comp : gfp, order); + if (page == NULL) { if (order == 0) { ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry); schedule_timeout_interruptible(1); @@ -305,7 +294,6 @@ abd_alloc_pages(abd_t *abd, size_t size) continue; } - page = virt_to_page(paddr); list_add_tail(&page->lru, &pages); if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid)) @@ -336,7 +324,41 @@ abd_alloc_pages(abd_t *abd, size_t size) list_del(&page->lru); } - if (chunks > 1) { + /* + * These conditions ensure that a possible transformation to a linear + * ABD would be valid. + */ + ASSERT(!PageHighMem(sg_page(table.sgl))); + ASSERT0(ABD_SCATTER(abd).abd_offset); + + if (table.nents == 1) { + /* + * Since there is only one entry, this ABD can be represented + * as a linear buffer. All single-page (4K) ABD's can be + * represented this way. Some multi-page ABD's can also be + * represented this way, if we were able to allocate a single + * "chunk" (higher-order "page" which represents a power-of-2 + * series of physically-contiguous pages). This is often the + * case for 2-page (8K) ABD's. + * + * Representing a single-entry scatter ABD as a linear ABD + * has the performance advantage of avoiding the copy (and + * allocation) in abd_borrow_buf_copy / abd_return_buf_copy. + * A performance increase of around 5% has been observed for + * ARC-cached reads (of small blocks which can take advantage + * of this). + * + * Note that this optimization is only possible because the + * pages are always mapped into the kernel's address space. + * This is not the case for highmem pages, so the + * optimization can not be made there. + */ + abd->abd_flags |= ABD_FLAG_LINEAR; + abd->abd_flags |= ABD_FLAG_LINEAR_PAGE; + abd->abd_u.abd_linear.abd_sgl = table.sgl; + abd->abd_u.abd_linear.abd_buf = + page_address(sg_page(table.sgl)); + } else if (table.nents > 1) { ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; @@ -344,10 +366,10 @@ abd_alloc_pages(abd_t *abd, size_t size) ABDSTAT_BUMP(abdstat_scatter_page_multi_zone); abd->abd_flags |= ABD_FLAG_MULTI_ZONE; } - } - ABD_SCATTER(abd).abd_sgl = table.sgl; - ABD_SCATTER(abd).abd_nents = table.nents; + ABD_SCATTER(abd).abd_sgl = table.sgl; + ABD_SCATTER(abd).abd_nents = table.nents; + } } #else /* @@ -427,10 +449,6 @@ abd_free_pages(abd_t *abd) struct page; -#define kpm_enable 1 -#define abd_alloc_chunk(o) \ - ((struct page *)umem_alloc_aligned(PAGESIZE << (o), 64, KM_SLEEP)) -#define abd_free_chunk(chunk, o) umem_free(chunk, PAGESIZE << (o)) #define zfs_kmap_atomic(chunk, km) ((void *)chunk) #define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0) #define local_irq_save(flags) do { (void)(flags); } while (0) @@ -491,7 +509,7 @@ abd_alloc_pages(abd_t *abd, size_t size) sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages); abd_for_each_sg(abd, sg, nr_pages, i) { - struct page *p = abd_alloc_chunk(0); + struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); sg_set_page(sg, p, PAGESIZE, 0); } ABD_SCATTER(abd).abd_nents = nr_pages; @@ -502,12 +520,11 @@ abd_free_pages(abd_t *abd) { int i, n = ABD_SCATTER(abd).abd_nents; struct scatterlist *sg; - int j; abd_for_each_sg(abd, sg, n, i) { - for (j = 0; j < sg->length; j += PAGESIZE) { - struct page *p = nth_page(sg_page(sg), j>>PAGE_SHIFT); - abd_free_chunk(p, 0); + for (int j = 0; j < sg->length; j += PAGESIZE) { + struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT); + umem_free(p, PAGESIZE); } } @@ -560,7 +577,7 @@ abd_verify(abd_t *abd) ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | - ABD_FLAG_MULTI_CHUNK)); + ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE)); IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); if (abd_is_linear(abd)) { @@ -613,6 +630,7 @@ abd_alloc(size_t size, boolean_t is_metadata) abd_t *abd = abd_alloc_struct(); abd->abd_flags = ABD_FLAG_OWNER; + abd->abd_u.abd_scatter.abd_offset = 0; abd_alloc_pages(abd, size); if (is_metadata) { @@ -622,8 +640,6 @@ abd_alloc(size_t size, boolean_t is_metadata) abd->abd_parent = NULL; zfs_refcount_create(&abd->abd_children); - abd->abd_u.abd_scatter.abd_offset = 0; - ABDSTAT_BUMP(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, size); ABDSTAT_INCR(abdstat_scatter_chunk_waste, @@ -681,6 +697,17 @@ abd_alloc_linear(size_t size, boolean_t is_metadata) static void abd_free_linear(abd_t *abd) { + if (abd_is_linear_page(abd)) { + /* Transform it back into a scatter ABD for freeing */ + struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; + abd->abd_flags &= ~ABD_FLAG_LINEAR; + abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; + ABD_SCATTER(abd).abd_nents = 1; + ABD_SCATTER(abd).abd_offset = 0; + ABD_SCATTER(abd).abd_sgl = sg; + abd_free_scatter(abd); + return; + } if (abd->abd_flags & ABD_FLAG_META) { zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); } else { @@ -718,7 +745,8 @@ abd_t * abd_alloc_sametype(abd_t *sabd, size_t size) { boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; - if (abd_is_linear(sabd)) { + if (abd_is_linear(sabd) && + !abd_is_linear_page(sabd)) { return (abd_alloc_linear(size, is_metadata)); } else { return (abd_alloc(size, is_metadata)); @@ -966,6 +994,16 @@ abd_release_ownership_of_buf(abd_t *abd) { ASSERT(abd_is_linear(abd)); ASSERT(abd->abd_flags & ABD_FLAG_OWNER); + + /* + * abd_free() needs to handle LINEAR_PAGE ABD's specially. + * Since that flag does not survive the + * abd_release_ownership_of_buf() -> abd_get_from_buf() -> + * abd_take_ownership_of_buf() sequence, we don't allow releasing + * these "linear but not zio_[data_]buf_alloc()'ed" ABD's. + */ + ASSERT(!abd_is_linear_page(abd)); + abd_verify(abd); abd->abd_flags &= ~ABD_FLAG_OWNER; @@ -1376,8 +1414,10 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, switch (parity) { case 3: len = MIN(caiters[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(caiters[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(caiters[0].iter_mapsize, len); } @@ -1467,9 +1507,11 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, case 3: len = MIN(xiters[2].iter_mapsize, len); len = MIN(citers[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(xiters[1].iter_mapsize, len); len = MIN(citers[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(xiters[0].iter_mapsize, len); len = MIN(citers[0].iter_mapsize, len); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 9b500352a4c0..a16689dc6b07 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ @@ -62,7 +62,7 @@ * elements of the cache are therefore exactly the same size. So * when adjusting the cache size following a cache miss, its simply * a matter of choosing a single page to evict. In our model, we - * have variable sized cache blocks (rangeing from 512 bytes to + * have variable sized cache blocks (ranging from 512 bytes to * 128K bytes). We therefore choose a set of blocks to evict to make * space for a cache miss that approximates as closely as possible * the space used by the new block. @@ -262,7 +262,7 @@ * The L1ARC has a slightly different system for storing encrypted data. * Raw (encrypted + possibly compressed) data has a few subtle differences from * data that is just compressed. The biggest difference is that it is not - * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded. + * possible to decrypt encrypted data (or vice-versa) if the keys aren't loaded. * The other difference is that encryption cannot be treated as a suggestion. * If a caller would prefer compressed data, but they actually wind up with * uncompressed data the worst thing that could happen is there might be a @@ -296,6 +296,7 @@ #include #include #include +#include #endif #include #include @@ -1872,7 +1873,8 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * There were no decompressed bufs, so there should not be a * checksum on the hdr either. */ - EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); + if (zfs_flags & ZFS_DEBUG_MODIFY) + EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); return (copied); } @@ -2151,7 +2153,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, } /* - * Adjust encrypted and authenticated headers to accomodate + * Adjust encrypted and authenticated headers to accommodate * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are * allowed to fail decryption due to keys not being loaded * without being marked as an IO error. @@ -2220,7 +2222,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, if (arc_buf_is_shared(buf)) { ASSERT(ARC_BUF_COMPRESSED(buf)); - /* We need to give the buf it's own b_data */ + /* We need to give the buf its own b_data */ buf->b_flags &= ~ARC_BUF_FLAG_SHARED; buf->b_data = arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); @@ -2253,7 +2255,6 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, */ if (arc_buf_try_copy_decompressed_data(buf)) { /* Skip byteswapping and checksumming (already done) */ - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { error = zio_decompress_data(HDR_GET_COMPRESS(hdr), @@ -2837,7 +2838,7 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * sufficient to make this guarantee, however it's possible * (specifically in the rare L2ARC write race mentioned in * arc_buf_alloc_impl()) there will be an existing uncompressed buf that - * is sharable, but wasn't at the time of its allocation. Rather than + * is shareable, but wasn't at the time of its allocation. Rather than * allow a new shared uncompressed buf to be created and then shuffle * the list around to make it the last element, this simply disallows * sharing if the new buf isn't the first to be added. @@ -2896,7 +2897,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, /* * Only honor requests for compressed bufs if the hdr is actually - * compressed. This must be overriden if the buffer is encrypted since + * compressed. This must be overridden if the buffer is encrypted since * encrypted buffers cannot be decompressed. */ if (encrypted) { @@ -2917,7 +2918,8 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, /* * If the hdr's data can be shared then we share the data buffer and * set the appropriate bit in the hdr's b_flags to indicate the hdr is - * allocate a new buffer to store the buf's data. + * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new + * buffer to store the buf's data. * * There are two additional restrictions here because we're sharing * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be @@ -2925,10 +2927,17 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, * an arc_write() then the hdr's data buffer will be released when the * write completes, even though the L2ARC write might still be using it. * Second, the hdr's ABD must be linear so that the buf's user doesn't - * need to be ABD-aware. - */ - boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd); + * need to be ABD-aware. It must be allocated via + * zio_[data_]buf_alloc(), not as a page, because we need to be able + * to abd_release_ownership_of_buf(), which isn't allowed on "linear + * page" buffers because the ABD code needs to handle freeing them + * specially. + */ + boolean_t can_share = arc_can_share(hdr, buf) && + !HDR_L2_WRITING(hdr) && + hdr->b_l1hdr.b_pabd != NULL && + abd_is_linear(hdr->b_l1hdr.b_pabd) && + !abd_is_linear_page(hdr->b_l1hdr.b_pabd); /* Set up b_data and sharing */ if (can_share) { @@ -3192,7 +3201,7 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) } /* - * Free up buf->b_data and pull the arc_buf_t off of the the arc_buf_hdr_t's + * Free up buf->b_data and pull the arc_buf_t off of the arc_buf_hdr_t's * list and free it. */ static void @@ -3651,7 +3660,7 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) /* * This function is used by the send / receive code to convert a newly * allocated arc_buf_t to one that is suitable for a raw encrypted write. It - * is also used to allow the root objset block to be uupdated without altering + * is also used to allow the root objset block to be updated without altering * its embedded MACs. Both block types will always be uncompressed so we do not * have to worry about compression type or psize. */ @@ -3731,7 +3740,6 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, * disk, it's easiest if we just set up sharing between the * buf and the hdr. */ - ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); arc_hdr_free_abd(hdr, B_FALSE); arc_share_buf(hdr, buf); } @@ -4801,8 +4809,6 @@ arc_reduce_target_size(int64_t to_free) if (c > to_free && c - to_free > arc_c_min) { arc_c = c - to_free; atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); - if (asize < arc_c) - arc_c = MAX(asize, arc_c_min); if (arc_p > arc_c) arc_p = (arc_c >> 1); ASSERT(arc_c >= arc_c_min); @@ -5081,6 +5087,9 @@ arc_kmem_reap_soon(void) static boolean_t arc_adjust_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + /* * This is necessary so that any changes which may have been made to * many of the zfs_arc_* module parameters will be propagated to @@ -5168,6 +5177,9 @@ arc_adjust_cb(void *arg, zthr_t *zthr) static boolean_t arc_reap_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + int64_t free_memory = arc_available_memory(); /* @@ -5480,7 +5492,7 @@ static boolean_t arc_is_overflowing(void) { /* Always allow at least one block of overflow */ - uint64_t overflow = MAX(SPA_MAXBLOCKSIZE, + int64_t overflow = MAX(SPA_MAXBLOCKSIZE, arc_c >> zfs_arc_overflow_shift); /* @@ -5492,7 +5504,7 @@ arc_is_overflowing(void) * in the ARC. In practice, that's in the tens of MB, which is low * enough to be safe. */ - return (aggsum_lower_bound(&arc_size) >= arc_c + overflow); + return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow); } static abd_t * @@ -5608,7 +5620,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * If we are growing the cache, and we are adding anonymous * data, and we have outgrown arc_p, update arc_p */ - if (aggsum_compare(&arc_size, arc_c) < 0 && + if (aggsum_upper_bound(&arc_size) < arc_c && hdr->b_l1hdr.b_state == arc_anon && (zfs_refcount_count(&arc_anon->arcs_size) + zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) @@ -6177,7 +6189,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, /* * Determine if we have an L1 cache hit or a cache miss. For simplicity - * we maintain encrypted data seperately from compressed / uncompressed + * we maintain encrypted data separately from compressed / uncompressed * data. If the user is requesting raw encrypted data and we don't have * that in the header we will read from disk to guarantee that we can * get it even if the encryption keys aren't loaded. @@ -7543,8 +7555,10 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) /* * Called during module initialization and periodically thereafter to - * apply reasonable changes to the exposed performance tunings. Non-zero - * zfs_* values which differ from the currently set values will be applied. + * apply reasonable changes to the exposed performance tunings. Can also be + * called explicitly by param_set_arc_*() functions when ARC tunables are + * updated manually. Non-zero zfs_* values which differ from the currently set + * values will be applied. */ static void arc_tuning_update(void) @@ -7926,11 +7940,9 @@ arc_fini(void) list_destroy(&arc_prune_list); mutex_destroy(&arc_prune_mtx); - (void) zthr_cancel(arc_adjust_zthr); - zthr_destroy(arc_adjust_zthr); + (void) zthr_cancel(arc_adjust_zthr); (void) zthr_cancel(arc_reap_zthr); - zthr_destroy(arc_reap_zthr); mutex_destroy(&arc_adjust_lock); cv_destroy(&arc_adjust_waiters_cv); @@ -7943,6 +7955,14 @@ arc_fini(void) buf_fini(); arc_state_fini(); + /* + * We destroy the zthrs after all the ARC state has been + * torn down to avoid the case of them receiving any + * wakeup() signals after they are destroyed. + */ + zthr_destroy(arc_adjust_zthr); + zthr_destroy(arc_reap_zthr); + ASSERT0(arc_loaned_bytes); } @@ -8544,7 +8564,6 @@ l2arc_read_done(zio_t *zio) zio->io_private = hdr; arc_read_done(zio); } else { - mutex_exit(hash_lock); /* * Buffer didn't survive caching. Increment stats and * reissue to the original storage device. @@ -8569,10 +8588,24 @@ l2arc_read_done(zio_t *zio) ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); - zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, + zio = zio_read(pio, zio->io_spa, zio->io_bp, abd, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, - &cb->l2rcb_zb)); + &cb->l2rcb_zb); + + /* + * Original ZIO will be freed, so we need to update + * ARC header with the new ZIO pointer to be used + * by zio_change_priority() in arc_read(). + */ + for (struct arc_callback *acb = hdr->b_l1hdr.b_acb; + acb != NULL; acb = acb->acb_next) + acb->acb_zio_head = zio; + + mutex_exit(hash_lock); + zio_nowait(zio); + } else { + mutex_exit(hash_lock); } } @@ -8760,7 +8793,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, /* * If this data simply needs its own buffer, we simply allocate it - * and copy the data. This may be done to elimiate a depedency on a + * and copy the data. This may be done to eliminate a dependency on a * shared buffer or to reallocate the buffer to match asize. */ if (HDR_HAS_RABD(hdr) && asize != psize) { @@ -9359,6 +9392,35 @@ l2arc_stop(void) } #if defined(_KERNEL) +static int +param_set_arc_long(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_long(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} + +static int +param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_int(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} + + EXPORT_SYMBOL(arc_buf_size); EXPORT_SYMBOL(arc_write); EXPORT_SYMBOL(arc_read); @@ -9368,20 +9430,25 @@ EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); /* BEGIN CSTYLED */ -module_param(zfs_arc_min, ulong, 0644); +module_param_call(zfs_arc_min, param_set_arc_long, param_get_long, + &zfs_arc_min, 0644); MODULE_PARM_DESC(zfs_arc_min, "Min arc size"); -module_param(zfs_arc_max, ulong, 0644); +module_param_call(zfs_arc_max, param_set_arc_long, param_get_long, + &zfs_arc_max, 0644); MODULE_PARM_DESC(zfs_arc_max, "Max arc size"); -module_param(zfs_arc_meta_limit, ulong, 0644); +module_param_call(zfs_arc_meta_limit, param_set_arc_long, param_get_long, + &zfs_arc_meta_limit, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); -module_param(zfs_arc_meta_limit_percent, ulong, 0644); +module_param_call(zfs_arc_meta_limit_percent, param_set_arc_long, + param_get_long, &zfs_arc_meta_limit_percent, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit_percent, "Percent of arc size for arc meta limit"); -module_param(zfs_arc_meta_min, ulong, 0644); +module_param_call(zfs_arc_meta_min, param_set_arc_long, param_get_long, + &zfs_arc_meta_min, 0644); MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata"); module_param(zfs_arc_meta_prune, int, 0644); @@ -9394,20 +9461,23 @@ MODULE_PARM_DESC(zfs_arc_meta_adjust_restarts, module_param(zfs_arc_meta_strategy, int, 0644); MODULE_PARM_DESC(zfs_arc_meta_strategy, "Meta reclaim strategy"); -module_param(zfs_arc_grow_retry, int, 0644); +module_param_call(zfs_arc_grow_retry, param_set_arc_int, param_get_int, + &zfs_arc_grow_retry, 0644); MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size"); module_param(zfs_arc_p_dampener_disable, int, 0644); MODULE_PARM_DESC(zfs_arc_p_dampener_disable, "disable arc_p adapt dampener"); -module_param(zfs_arc_shrink_shift, int, 0644); +module_param_call(zfs_arc_shrink_shift, param_set_arc_int, param_get_int, + &zfs_arc_shrink_shift, 0644); MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)"); module_param(zfs_arc_pc_percent, uint, 0644); MODULE_PARM_DESC(zfs_arc_pc_percent, "Percent of pagecache to reclaim arc to"); -module_param(zfs_arc_p_min_shift, int, 0644); +module_param_call(zfs_arc_p_min_shift, param_set_arc_int, param_get_int, + &zfs_arc_p_min_shift, 0644); MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p"); module_param(zfs_arc_average_blocksize, int, 0444); @@ -9416,7 +9486,8 @@ MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size"); module_param(zfs_compressed_arc_enabled, int, 0644); MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers"); -module_param(zfs_arc_min_prefetch_ms, int, 0644); +module_param_call(zfs_arc_min_prefetch_ms, param_set_arc_int, param_get_int, + &zfs_arc_min_prefetch_ms, 0644); MODULE_PARM_DESC(zfs_arc_min_prefetch_ms, "Min life of prefetch block in ms"); module_param(zfs_arc_min_prescient_prefetch_ms, int, 0644); @@ -9450,14 +9521,17 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup"); module_param(l2arc_norw, int, 0644); MODULE_PARM_DESC(l2arc_norw, "No reads during writes"); -module_param(zfs_arc_lotsfree_percent, int, 0644); +module_param_call(zfs_arc_lotsfree_percent, param_set_arc_int, param_get_int, + &zfs_arc_lotsfree_percent, 0644); MODULE_PARM_DESC(zfs_arc_lotsfree_percent, "System free memory I/O throttle in bytes"); -module_param(zfs_arc_sys_free, ulong, 0644); +module_param_call(zfs_arc_sys_free, param_set_arc_long, param_get_long, + &zfs_arc_sys_free, 0644); MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes"); -module_param(zfs_arc_dnode_limit, ulong, 0644); +module_param_call(zfs_arc_dnode_limit, param_set_arc_long, param_get_long, + &zfs_arc_dnode_limit, 0644); MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc"); module_param(zfs_arc_dnode_limit_percent, ulong, 0644); diff --git a/module/zfs/bqueue.c b/module/zfs/bqueue.c index f30253d24bfb..3fc7fcaaada7 100644 --- a/module/zfs/bqueue.c +++ b/module/zfs/bqueue.c @@ -73,7 +73,7 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size) mutex_enter(&q->bq_lock); obj2node(q, data)->bqn_size = item_size; while (q->bq_size + item_size > q->bq_maxsize) { - cv_wait(&q->bq_add_cv, &q->bq_lock); + cv_wait_sig(&q->bq_add_cv, &q->bq_lock); } q->bq_size += item_size; list_insert_tail(&q->bq_list, data); @@ -91,7 +91,7 @@ bqueue_dequeue(bqueue_t *q) uint64_t item_size; mutex_enter(&q->bq_lock); while (q->bq_size == 0) { - cv_wait(&q->bq_pop_cv, &q->bq_lock); + cv_wait_sig(&q->bq_pop_cv, &q->bq_lock); } ret = list_remove_head(&q->bq_list); ASSERT3P(ret, !=, NULL); diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index 522825c42ccf..e46a0926d557 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -135,6 +135,7 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) kstat->ks_data = dk_kstats; kstat->ks_update = dataset_kstats_update; kstat->ks_private = dk; + kstat->ks_data_size += ZFS_MAX_DATASET_NAME_LEN; kstat_install(kstat); dk->dk_kstats = kstat; diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 07e616f6f0de..0542ba7aeb2c 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1826,9 +1826,11 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) db->db.db_size = size; if (db->db_level == 0) { - ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); db->db_last_dirty->dt.dl.dr_data = buf; } + ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); + ASSERT3U(db->db_last_dirty->dr_accounted, ==, osize); + db->db_last_dirty->dr_accounted = size; mutex_exit(&db->db_mtx); dmu_objset_willuse_space(dn->dn_objset, size - osize, tx); @@ -2041,7 +2043,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); } - if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL) + if (db->db_blkid != DMU_BONUS_BLKID) dr->dr_accounted = db->db.db_size; dr->dr_dbuf = db; dr->dr_txg = tx->tx_txg; @@ -2273,7 +2275,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) ASSERT(!zfs_refcount_is_zero(&db->db_holds)); /* - * Quick check for dirtyness. For already dirty blocks, this + * Quick check for dirtiness. For already dirty blocks, this * reduces runtime of this function by >90%, and overall performance * by 50% for some workloads (e.g. file deletion with indirect blocks * cached). @@ -2591,7 +2593,8 @@ dbuf_destroy(dmu_buf_impl_t *db) if (db->db_blkid != DMU_BONUS_BLKID) { boolean_t needlock = !MUTEX_HELD(&dn->dn_dbufs_mtx); if (needlock) - mutex_enter(&dn->dn_dbufs_mtx); + mutex_enter_nested(&dn->dn_dbufs_mtx, + NESTED_SINGLE); avl_remove(&dn->dn_dbufs, db); atomic_dec_32(&dn->dn_dbufs_count); membar_producer(); @@ -2788,7 +2791,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, * Hold the dn_dbufs_mtx while we get the new dbuf * in the hash table *and* added to the dbufs list. * This prevents a possible deadlock with someone - * trying to look up this dbuf before its added to the + * trying to look up this dbuf before it's added to the * dn_dbufs list. */ mutex_enter(&dn->dn_dbufs_mtx); @@ -3182,7 +3185,7 @@ dbuf_hold_impl_arg(struct dbuf_hold_arg *dh) ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf)); /* - * If this buffer is currently syncing out, and we are are + * If this buffer is currently syncing out, and we are * still referencing it from db_data, we need to make a copy * of it in case we decide we want to dirty it again in this txg. */ @@ -3663,7 +3666,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) /* * This buffer was allocated at a time when there was * no available blkptrs from the dnode, or it was - * inappropriate to hook it in (i.e., nlevels mis-match). + * inappropriate to hook it in (i.e., nlevels mismatch). */ ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); ASSERT(db->db_parent == NULL); @@ -3784,6 +3787,46 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) zio_nowait(zio); } +#ifdef ZFS_DEBUG +/* + * Verify that the size of the data in our bonus buffer does not exceed + * its recorded size. + * + * The purpose of this verification is to catch any cases in development + * where the size of a phys structure (i.e space_map_phys_t) grows and, + * due to incorrect feature management, older pools expect to read more + * data even though they didn't actually write it to begin with. + * + * For a example, this would catch an error in the feature logic where we + * open an older pool and we expect to write the space map histogram of + * a space map with size SPACE_MAP_SIZE_V0. + */ +static void +dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr) +{ + dnode_t *dn = DB_DNODE(dr->dr_dbuf); + + /* + * Encrypted bonus buffers can have data past their bonuslen. + * Skip the verification of these blocks. + */ + if (DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)) + return; + + uint16_t bonuslen = dn->dn_phys->dn_bonuslen; + uint16_t maxbonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); + ASSERT3U(bonuslen, <=, maxbonuslen); + + arc_buf_t *datap = dr->dt.dl.dr_data; + char *datap_end = ((char *)datap) + bonuslen; + char *datap_max = ((char *)datap) + maxbonuslen; + + /* ensure that everything is zero after our data */ + for (; datap_end < datap_max; datap_end++) + ASSERT(*datap_end == 0); +} +#endif + /* * dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is * critical the we not allow the compiler to inline this function in to @@ -3860,6 +3903,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) DN_MAX_BONUS_LEN(dn->dn_phys)); DB_DNODE_EXIT(db); +#ifdef ZFS_DEBUG + dbuf_sync_leaf_verify_bonus_dnode(dr); +#endif + if (*datap != db->db.db_data) { int slots = DB_DNODE(db)->dn_num_slots; int bonuslen = DN_SLOTS_TO_BONUSLEN(slots); @@ -4165,8 +4212,7 @@ dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) /* * The callback will be called io_phys_children times. Retire one * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dsl_pool_sync()'s call to - * dsl_pool_undirty_space(). + * error will be cleaned up by dbuf_write_done(). */ delta = dr->dr_accounted / zio->io_phys_children; dsl_pool_undirty_space(dp, delta, zio->io_txg); @@ -4249,13 +4295,36 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); cv_broadcast(&db->db_changed); ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; db->db_data_pending = NULL; dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); + + /* + * If we didn't do a physical write in this ZIO and we + * still ended up here, it means that the space of the + * dbuf that we just released (and undirtied) above hasn't + * been marked as undirtied in the pool's accounting. + * + * Thus, we undirty that space in the pool's view of the + * world here. For physical writes this type of update + * happens in dbuf_write_physdone(). + * + * If we did a physical write, cleanup any rounding errors + * that came up due to writing multiple copies of a block + * on disk [see dbuf_write_physdone()]. + */ + if (zio->io_phys_children == 0) { + dsl_pool_undirty_space(dmu_objset_pool(os), + dr->dr_accounted, zio->io_txg); + } else { + dsl_pool_undirty_space(dmu_objset_pool(os), + dr->dr_accounted % zio->io_phys_children, zio->io_txg); + } + + kmem_free(dr, sizeof (dbuf_dirty_record_t)); } static void diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 77c0784cca0b..3489d31d9c9e 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018 by Delphix. All rights reserved. */ #include @@ -117,7 +118,18 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) zap_attribute_t za; int error; - zap_cursor_init_serialized(&zc, os, object, *walk); + if (*walk == 0) { + /* + * We don't want to prefetch the entire ZAP object, because + * it can be enormous. Also the primary use of DDT iteration + * is for scrubbing, in which case we will be issuing many + * scrub I/Os for each ZAP block that we read in, so + * reading the ZAP is unlikely to be the bottleneck. + */ + zap_cursor_init_noprefetch(&zc, os, object); + } else { + zap_cursor_init_serialized(&zc, os, object, *walk); + } if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { uchar_t cbuf[sizeof (dde->dde_phys) + 1]; uint64_t csize = za.za_num_integers; diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1697a632078e..f972545d30b5 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -81,6 +81,13 @@ int zfs_dmu_offset_next_sync = 0; */ int zfs_object_remap_one_indirect_delay_ms = 0; +/* + * Limit the amount we can prefetch with one call to this amount. This + * helps to limit the amount of memory that can be used by prefetching. + * Larger objects should be prefetched a bit at a time. + */ +int dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; + const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, @@ -639,11 +646,11 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) /* * Issue prefetch i/os for the given blocks. If level is greater than 0, the - * indirect blocks prefeteched will be those that point to the blocks containing + * indirect blocks prefetched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. * * Note that if the indirect blocks above the blocks being prefetched are not - * in cache, they will be asychronously read in. + * in cache, they will be asynchronously read in. */ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, @@ -667,6 +674,11 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, return; } + /* + * See comment before the definition of dmu_prefetch_max. + */ + len = MIN(len, dmu_prefetch_max); + /* * XXX - Note, if the dnode for the requested object is not * already cached, we will do a *synchronous* read in the @@ -719,8 +731,8 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) uint64_t blks; uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1); /* bytes of data covered by a level-1 indirect block */ - uint64_t iblkrange = - dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); + uint64_t iblkrange = (uint64_t)dn->dn_datablksz * + EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); ASSERT3U(minimum, <=, *start); @@ -1086,6 +1098,9 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_rele_array(dbp, numbufs, FTAG); } +/* + * Note: Lustre is an external consumer of this interface. + */ void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx) @@ -2286,7 +2301,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) * Determine dedup setting. If we are in dmu_sync(), * we won't actually dedup now because that's all * done in syncing context; but we do want to use the - * dedup checkum. If the checksum is not strong + * dedup checksum. If the checksum is not strong * enough to ensure unique signatures, force * dedup_verify. */ @@ -2373,39 +2388,14 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) return (err); /* - * Check if there are dirty data blocks or frees which have not been - * synced. Dirty spill and bonus blocks which are external to the - * object can ignored when reporting holes. + * Check if dnode is dirty */ - mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { if (multilist_link_active(&dn->dn_dirty_link[i])) { - - if (dn->dn_free_ranges[i] != NULL) { - clean = B_FALSE; - break; - } - - list_t *list = &dn->dn_dirty_records[i]; - dbuf_dirty_record_t *dr; - - for (dr = list_head(list); dr != NULL; - dr = list_next(list, dr)) { - dmu_buf_impl_t *db = dr->dr_dbuf; - - if (db->db_blkid == DMU_SPILL_BLKID || - db->db_blkid == DMU_BONUS_BLKID) - continue; - - clean = B_FALSE; - break; - } - } - - if (clean == B_FALSE) + clean = B_FALSE; break; + } } - mutex_exit(&dn->dn_mtx); /* * If compatibility option is on, sync any current changes before @@ -2654,6 +2644,10 @@ module_param(zfs_dmu_offset_next_sync, int, 0644); MODULE_PARM_DESC(zfs_dmu_offset_next_sync, "Enable forcing txg sync to find holes"); +module_param(dmu_prefetch_max, int, 0644); +MODULE_PARM_DESC(dmu_prefetch_max, + "Limit one prefetch call to this size"); + /* END CSTYLED */ #endif diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index 76c32b126423..6a7cd844c44a 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -130,7 +131,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dnode_phys_t *blk; arc_buf_t *abuf; arc_flags_t aflags = ARC_FLAG_WAIT; - int blksz = BP_GET_LSIZE(bp); + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; int zio_flags = ZIO_FLAG_CANFAIL; int i; @@ -142,7 +143,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (SET_ERROR(EIO)); blk = abuf->b_data; - for (i = 0; i < blksz >> DNODE_SHIFT; i++) { + for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = report_dnode(da, dnobj, blk+i); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index f95915b9e253..c78019d05ac5 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1027,7 +1027,7 @@ dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* * We don't want to have to increase the meta-dnode's nlevels - * later, because then we could do it in quescing context while + * later, because then we could do it in quiescing context while * we are also accessing it in open context. * * This precaution is not necessary for the MOS (ds == NULL), @@ -1348,13 +1348,6 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EINVAL)); } - error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir); - if (error != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dir_rele(pdd, FTAG); - return (error); - } - dsl_dataset_rele(origin, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1699,6 +1692,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; + int num_sublists; + multilist_t *ml; blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); *blkptr_copy = *os->os_rootbp; @@ -1787,10 +1782,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) } } - for (int i = 0; - i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) { + ml = os->os_dirty_dnodes[txgoff]; + num_sublists = multilist_get_num_sublists(ml); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(ml, i)) + continue; sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP); - sda->sda_list = os->os_dirty_dnodes[txgoff]; + sda->sda_list = ml; sda->sda_sublist_idx = i; sda->sda_tx = tx; (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq, @@ -2093,6 +2091,8 @@ userquota_updates_task(void *arg) void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) { + int num_sublists; + if (!dmu_objset_userused_enabled(os)) return; @@ -2125,8 +2125,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } - for (int i = 0; - i < multilist_get_num_sublists(os->os_synced_dnodes); i++) { + num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i)) + continue; userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); uua->uua_os = os; @@ -2740,7 +2742,7 @@ dmu_objset_find_dp_cb(void *arg) /* * We need to get a pool_config_lock here, as there are several - * asssert(pool_config_held) down the stack. Getting a lock via + * assert(pool_config_held) down the stack. Getting a lock via * dsl_pool_config_enter is risky, as it might be stalled by a * pending writer. This would deadlock, as the write lock can * only be granted when our parent thread gives up the lock. @@ -3000,9 +3002,17 @@ dmu_fsname(const char *snapname, char *buf) } /* - * Call when we think we're going to write/free space in open context to track - * the amount of dirty data in the open txg, which is also the amount - * of memory that can not be evicted until this txg syncs. + * Call when we think we're going to write/free space in open context + * to track the amount of dirty data in the open txg, which is also the + * amount of memory that can not be evicted until this txg syncs. + * + * Note that there are two conditions where this can be called from + * syncing context: + * + * [1] When we just created the dataset, in which case we go on with + * updating any accounting of dirty data as usual. + * [2] When we are dirtying MOS data, in which case we only update the + * pool's accounting of dirty data. */ void dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx) @@ -3012,8 +3022,9 @@ dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx) if (ds != NULL) { dsl_dir_willuse_space(ds->ds_dir, aspace, tx); - dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx); } + + dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx); } #if defined(_KERNEL) diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 976b1bd46420..2324e8e87ba2 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -86,21 +86,25 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0; boolean_t embed = (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) != 0; - /* temporary clone name must not exist */ + /* Temporary clone name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EBUSY : error); - /* new snapshot name must not exist */ + /* Resume state must not be set. */ + if (dsl_dataset_has_resume_receive_state(ds)) + return (SET_ERROR(EBUSY)); + + /* New snapshot name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EEXIST : error); - /* must not have children if receiving a ZVOL */ + /* Must not have children if receiving a ZVOL. */ error = zap_count(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &children); if (error != 0) @@ -158,9 +162,16 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, } else { /* * If we are not forcing, there must be no - * changes since fromsnap. + * changes since fromsnap. Raw sends have an + * additional constraint that requires that + * no "noop" snapshots exist between fromsnap + * and tosnap for the IVset checking code to + * work properly. */ - if (dsl_dataset_modified_since_snap(ds, snap)) { + if (dsl_dataset_modified_since_snap(ds, snap) || + (raw && + dsl_dataset_phys(ds)->ds_prev_snap_obj != + snap->ds_object)) { dsl_dataset_rele(snap, FTAG); return (SET_ERROR(ETXTBSY)); } @@ -320,7 +331,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); + error = dsl_dataset_hold(dp, buf, FTAG, &ds); if (error != 0) return (error); @@ -338,13 +349,13 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dmu_objset_create_crypt_check(ds->ds_dir, drba->drba_dcp, &will_encrypt); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (will_encrypt && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } } @@ -357,25 +368,25 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } /* can't recv below anything but filesystems (eg. no ZVOLs) */ error = dmu_objset_from_ds(ds, &os); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (dmu_objset_type(os) != DMU_OST_ZFS) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ZFS_ERR_WRONG_PARENT)); } @@ -385,31 +396,31 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold_flags(dp, drba->drba_origin, dsflags, FTAG, &origin); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (!origin->ds_is_snapshot) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); } if (origin->ds_dir->dd_crypto_obj != 0 && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } dsl_dataset_rele_flags(origin, dsflags, FTAG); } - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); error = 0; } return (error); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index cbadcc86fc61..d6a42f84c751 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -925,6 +925,25 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) txh = list_next(&tx->tx_holds, txh)) { dnode_t *dn = txh->txh_dnode; if (dn != NULL) { + /* + * This thread can't hold the dn_struct_rwlock + * while assigning the tx, because this can lead to + * deadlock. Specifically, if this dnode is already + * assigned to an earlier txg, this thread may need + * to wait for that txg to sync (the ERESTART case + * below). The other thread that has assigned this + * dnode to an earlier txg prevents this txg from + * syncing until its tx can complete (calling + * dmu_tx_commit()), but it may need to acquire the + * dn_struct_rwlock to do so (e.g. via + * dmu_buf_hold*()). + * + * Note that this thread can't hold the lock for + * read either, but the rwlock doesn't record + * enough information to make that assertion. + */ + ASSERT(!RW_WRITE_HELD(&dn->dn_struct_rwlock)); + mutex_enter(&dn->dn_mtx); if (dn->dn_assigned_txg == tx->tx_txg - 1) { mutex_exit(&dn->dn_mtx); @@ -1319,7 +1338,10 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) object = sa_handle_object(hdl); - dmu_tx_hold_bonus(tx, object); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; + DB_DNODE_ENTER(db); + dmu_tx_hold_bonus_by_dnode(tx, DB_DNODE(db)); + DB_DNODE_EXIT(db); if (tx->tx_objset->os_sa->sa_master_obj == 0) return; @@ -1341,7 +1363,6 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) ASSERT(tx->tx_txg == 0); dmu_tx_hold_spill(tx, object); } else { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; dnode_t *dn; DB_DNODE_ENTER(db); diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index 364e4d7aa867..46dc4627cf3a 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -223,7 +223,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) * can only read from blocks that we carefully ensure are on * concrete vdevs (or previously-loaded indirect vdevs). So we * can't allow the predictive prefetcher to attempt reads of other - * blocks (e.g. of the MOS's dnode obejct). + * blocks (e.g. of the MOS's dnode object). */ if (!spa_indirect_vdevs_loaded(spa)) return; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c06f614e1993..7acfc36c87d0 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -55,7 +55,6 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, - { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, @@ -390,6 +389,14 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) rw_enter(&dn->dn_struct_rwlock, RW_WRITER); ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) - (dn->dn_nblkptr-1) * sizeof (blkptr_t)); + + if (newsize < dn->dn_bonuslen) { + /* clear any data after the end of the new size */ + size_t diff = dn->dn_bonuslen - newsize; + char *data_end = ((char *)dn->dn_bonus->db.db_data) + newsize; + bzero(data_end, diff); + } + dn->dn_bonuslen = newsize; if (newsize == 0) dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN; @@ -1255,6 +1262,10 @@ dnode_buf_evict_async(void *dbu) * as an extra dnode slot by an large dnode, in which case it returns * ENOENT. * + * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just + * return whether the hold would succeed or not. tag and dnp should set to + * NULL in this case. + * * errors: * EINVAL - Invalid object number or flags. * ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE) @@ -1283,6 +1294,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0)); ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0)); + IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL)); /* * If you are holding the spa config lock as writer, you shouldn't @@ -1312,8 +1324,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) return (SET_ERROR(EEXIST)); DNODE_VERIFY(dn); - (void) zfs_refcount_add(&dn->dn_holds, tag); - *dnp = dn; + /* Don't actually hold if dry run, just return 0 */ + if (!(flag & DNODE_DRY_RUN)) { + (void) zfs_refcount_add(&dn->dn_holds, tag); + *dnp = dn; + } return (0); } @@ -1455,6 +1470,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(ENOENT)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + DNODE_STAT_BUMP(dnode_hold_alloc_hits); } else if (flag & DNODE_MUST_BE_FREE) { @@ -1512,6 +1535,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EEXIST)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR); DNODE_STAT_BUMP(dnode_hold_free_hits); } else { @@ -1519,15 +1550,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EINVAL)); } - if (dn->dn_free_txg) { - DNODE_STAT_BUMP(dnode_hold_free_txg); - type = dn->dn_type; - mutex_exit(&dn->dn_mtx); - dnode_slots_rele(dnc, idx, slots); - dbuf_rele(db, FTAG); - return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ? - ENOENT : EEXIST)); - } + ASSERT0(dn->dn_free_txg); if (zfs_refcount_add(&dn->dn_holds, tag) == 1) dbuf_add_ref(db, dnh); @@ -1538,6 +1561,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, dnode_slots_rele(dnc, idx, slots); DNODE_VERIFY(dn); + ASSERT3P(dnp, !=, NULL); ASSERT3P(dn->dn_dbuf, ==, db); ASSERT3U(dn->dn_object, ==, object); dbuf_rele(db, FTAG); @@ -1618,6 +1642,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) } } +/* + * Test whether we can create a dnode at the specified location. + */ +int +dnode_try_claim(objset_t *os, uint64_t object, int slots) +{ + return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN, + slots, NULL, NULL)); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { @@ -1754,7 +1788,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) dn->dn_indblkshift = ibs; dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; } - /* rele after we have fixed the blocksize in the dnode */ + /* release after we have fixed the blocksize in the dnode */ if (db) dbuf_rele(db, FTAG); @@ -2483,3 +2517,13 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, return (error); } + +#if defined(_KERNEL) +EXPORT_SYMBOL(dnode_hold); +EXPORT_SYMBOL(dnode_rele); +EXPORT_SYMBOL(dnode_set_nlevels); +EXPORT_SYMBOL(dnode_set_blksz); +EXPORT_SYMBOL(dnode_free_range); +EXPORT_SYMBOL(dnode_evict_dbufs); +EXPORT_SYMBOL(dnode_evict_bonus); +#endif diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 581f812a14d1..d3acf1baaeaa 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -384,7 +384,21 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - if (trunc) { + /* + * Do not truncate the maxblkid if we are performing a raw + * receive. The raw receive sets the maxblkid manually and + * must not be overridden. Usually, the last DRR_FREE record + * will be at the maxblkid, because the source system sets + * the maxblkid when truncating. However, if the last block + * was freed by overwriting with zeros and being compressed + * away to a hole, the source system will generate a DRR_FREE + * record while leaving the maxblkid after the end of that + * record. In this case we need to leave the maxblkid as + * indicated in the DRR_OBJECT record, so that it matches the + * source system, ensuring that the cryptographic hashes will + * match. + */ + if (trunc && !dn->dn_objset->os_raw_receive) { ASSERTV(uint64_t off); dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index a32198402f4b..01362e0ad28d 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -84,7 +84,7 @@ dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname, } /* - * If later_ds is non-NULL, this will return EXDEV if the the specified bookmark + * If later_ds is non-NULL, this will return EXDEV if the specified bookmark * does not represents an earlier point in later_ds's timeline. * * Returns ENOENT if the dataset containing the bookmark does not exist. diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 21db8e51ffd0..162a3613c282 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -227,7 +227,7 @@ dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, goto error; } - /* if the user asked for the deault crypt, determine that now */ + /* if the user asked for the default crypt, determine that now */ if (dcp->cp_crypt == ZIO_CRYPT_ON) dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; @@ -1418,11 +1418,19 @@ spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) return (ret); } - +/* + * This function deals with the intricacies of updating wrapping + * key references and encryption roots recursively in the event + * of a call to 'zfs change-key' or 'zfs promote'. The 'skip' + * parameter should always be set to B_FALSE when called + * externally. + */ static void spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, - uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) + uint64_t new_rddobj, dsl_wrapping_key_t *wkey, boolean_t skip, + dmu_tx_t *tx) { + int ret; zap_cursor_t *zc; zap_attribute_t *za; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1435,18 +1443,21 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, /* hold the dd */ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); - /* ignore hidden dsl dirs */ + /* ignore special dsl dirs */ if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { dsl_dir_rele(dd, FTAG); return; } + ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); + VERIFY(ret == 0 || ret == ENOENT); + /* * Stop recursing if this dsl dir didn't inherit from the root * or if this dd is a clone. */ - VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); - if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { + if (ret == ENOENT || + (!skip && (curr_rddobj != rddobj || dsl_dir_is_clone(dd)))) { dsl_dir_rele(dd, FTAG); return; } @@ -1454,19 +1465,23 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, /* * If we don't have a wrapping key just update the dck to reflect the * new encryption root. Otherwise rewrap the entire dck and re-sync it - * to disk. + * to disk. If skip is set, we don't do any of this work. */ - if (wkey == NULL) { - VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, - DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); - } else { - VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, - FTAG, &dck)); - dsl_wrapping_key_hold(wkey, dck); - dsl_wrapping_key_rele(dck->dck_wkey, dck); - dck->dck_wkey = wkey; - dsl_crypto_key_sync(dck, tx); - spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + if (!skip) { + if (wkey == NULL) { + VERIFY0(zap_update(dp->dp_meta_objset, + dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, + &new_rddobj, tx)); + } else { + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, + FTAG, &dck)); + dsl_wrapping_key_hold(wkey, dck); + dsl_wrapping_key_rele(dck->dck_wkey, dck); + dck->dck_wkey = wkey; + dsl_crypto_key_sync(dck, tx); + spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + } } zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); @@ -1478,7 +1493,27 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { spa_keystore_change_key_sync_impl(rddobj, - za->za_first_integer, new_rddobj, wkey, tx); + za->za_first_integer, new_rddobj, wkey, B_FALSE, tx); + } + zap_cursor_fini(zc); + + /* + * Recurse into all dsl dirs of clones. We utilize the skip parameter + * here so that we don't attempt to process the clones directly. This + * is because the clone and its origin share the same dck, which has + * already been updated. + */ + for (zap_cursor_init(zc, dp->dp_meta_objset, + dsl_dir_phys(dd)->dd_clones); + zap_cursor_retrieve(zc, za) == 0; + zap_cursor_advance(zc)) { + dsl_dataset_t *clone; + + VERIFY0(dsl_dataset_hold_obj(dp, za->za_first_integer, + FTAG, &clone)); + spa_keystore_change_key_sync_impl(rddobj, + clone->ds_dir->dd_object, new_rddobj, wkey, B_TRUE, tx); + dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(zc); @@ -1558,7 +1593,7 @@ spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) /* recurse through all children and rewrap their keys */ spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, - new_rddobj, wkey, tx); + new_rddobj, wkey, B_FALSE, tx); /* * All references to the old wkey should be released now (if it @@ -1596,7 +1631,7 @@ spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) /* * Perform the actual work in syncing context. The blocks modified * here could be calculated but it would require holding the pool - * lock and tarversing all of the datasets that will have their keys + * lock and traversing all of the datasets that will have their keys * changed. */ return (dsl_sync_task(dsname, spa_keystore_change_key_check, @@ -1610,15 +1645,8 @@ dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) int ret; uint64_t curr_rddobj, parent_rddobj; - if (dd->dd_crypto_obj == 0) { - /* children of encrypted parents must be encrypted */ - if (newparent->dd_crypto_obj != 0) { - ret = SET_ERROR(EACCES); - goto error; - } - + if (dd->dd_crypto_obj == 0) return (0); - } ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); if (ret != 0) @@ -1683,11 +1711,15 @@ dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) * Check that the parent of the target has the same encryption root. */ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); if (op_rddobj != tp_rddobj) @@ -1717,7 +1749,7 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, return; /* - * If the target is being promoted to the encyrption root update the + * If the target is being promoted to the encryption root update the * DSL Crypto Key and keylocation to reflect that. We also need to * update the DSL Crypto Keys of all children inheritting their * encryption root to point to the new target. Otherwise, the check @@ -1739,7 +1771,7 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, - target->dd_object, NULL, tx); + target->dd_object, NULL, B_FALSE, tx); rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); dsl_dataset_rele(targetds, FTAG); @@ -1747,34 +1779,6 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, kmem_free(keylocation, ZAP_MAXVALUELEN); } -int -dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd) -{ - int ret; - uint64_t pcrypt, crypt; - - /* - * Check that we are not making an unencrypted child of an - * encrypted parent. - */ - ret = dsl_dir_get_crypt(parentdd, &pcrypt); - if (ret != 0) - return (ret); - - ret = dsl_dir_get_crypt(origindd, &crypt); - if (ret != 0) - return (ret); - - ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); - ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - - return (0); -} - - int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt) @@ -1805,13 +1809,6 @@ dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - /* - * We can't create an unencrypted child of an encrypted parent - * under any circumstances. - */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - /* check for valid dcp with no encryption (inherited or local) */ if (crypt == ZIO_CRYPT_OFF) { /* Must not specify encryption params */ @@ -2662,11 +2659,13 @@ dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) } if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { - VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, - &enc_root)); - dsl_dir_name(enc_root, buf); - dsl_dir_rele(enc_root, FTAG); - dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); + if (dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, + &enc_root) == 0) { + dsl_dir_name(enc_root, buf); + dsl_dir_rele(enc_root, FTAG); + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_ENCRYPTION_ROOT, buf); + } } } diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 966c2cc93d13..33b8cafbfb61 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -363,7 +363,7 @@ load_zfeature(objset_t *mos, dsl_dataset_t *ds, spa_feature_t f) } /* - * We have to release the fsid syncronously or we risk that a subsequent + * We have to release the fsid synchronously or we risk that a subsequent * mount of the same dataset will fail to unique_insert the fsid. This * failure would manifest itself as the fsid of this dataset changing * between mounts which makes NFS clients quite unhappy. @@ -2076,7 +2076,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) * We use nvlist_alloc() instead of fnvlist_alloc() because the * latter would allocate the list with NV_UNIQUE_NAME flag. * As a result, every time a clone name is appended to the list - * it would be (linearly) searched for for a duplicate name. + * it would be (linearly) searched for a duplicate name. * We already know that all clone names must be unique and we * want avoid the quadratic complexity of double-checking that * because we can have a large number of clones. @@ -2404,7 +2404,7 @@ dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, int error; dsl_pool_t *dp = ds->ds_dir->dd_pool; - /* Retrieve the mountpoint value stored in the zap opbject */ + /* Retrieve the mountpoint value stored in the zap object */ error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1, ZAP_MAXVALUELEN, value, source); if (error != 0) { @@ -3635,7 +3635,7 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, * The clone can't be too much over the head's refquota. * * To ensure that the entire refquota can be used, we allow one - * transaction to exceed the the refquota. Therefore, this check + * transaction to exceed the refquota. Therefore, this check * needs to also allow for the space referenced to be more than the * refquota. The maximum amount of space that one transaction can use * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 465b3dfac890..ede54d9092d6 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -632,7 +632,7 @@ dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, /* * lzc_destroy_snaps() is documented to fill the errlist with - * int32 values, so we need to covert the int64 values that are + * int32 values, so we need to convert the int64 values that are * returned from LUA. */ int rv = 0; @@ -1059,9 +1059,10 @@ dsl_destroy_head(const char *name) /* * Head deletion is processed in one txg on old pools; * remove the objects from open context so that the txg sync - * is not too long. + * is not too long. This optimization can only work for + * encrypted datasets if the wrapping key is loaded. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE, FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = @@ -1073,7 +1074,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, B_FALSE, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); } } diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 6fb711f592c2..724f80ff3f5d 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -96,7 +96,7 @@ * limit set. If there is a limit at any initialized level up the tree, the * check must pass or the creation will fail. Likewise, when a filesystem or * snapshot is destroyed, the counts are recursively adjusted all the way up - * the initizized nodes in the tree. Renaming a filesystem into different point + * the initialized nodes in the tree. Renaming a filesystem into different point * in the tree will first validate, then update the counts on each branch up to * the common ancestor. A receive will also validate the counts and then update * them. @@ -1495,7 +1495,7 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) * less than the amount specified. * * NOTE: The behavior of this function is identical to the Illumos / FreeBSD - * version however it has been adjusted to use an iterative rather then + * version however it has been adjusted to use an iterative rather than * recursive algorithm to minimize stack usage. */ void diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 10e967ab91ed..d8cf4d209e17 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -659,15 +659,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) } VERIFY0(zio_wait(zio)); - /* - * We have written all of the accounted dirty data, so our - * dp_space_towrite should now be zero. However, some seldom-used - * code paths do not adhere to this (e.g. dbuf_undirty(), also - * rounding error in dbuf_write_physdone). - * Shore up the accounting of any dirtied space now. - */ - dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); - /* * Update the long range free counter after * we're done syncing user data @@ -762,6 +753,21 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) dsl_pool_sync_mos(dp, tx); } + /* + * We have written all of the accounted dirty data, so our + * dp_space_towrite should now be zero. However, some seldom-used + * code paths do not adhere to this (e.g. dbuf_undirty()). Shore up + * the accounting of any dirtied space now. + * + * Note that, besides any dirty data from datasets, the amount of + * dirty data in the MOS is also accounted by the pool. Therefore, + * we want to do this cleanup after dsl_pool_sync_mos() so we don't + * attempt to update the accounting for the same dirty data twice. + * (i.e. at this point we only update the accounting for the space + * that we know that we "leaked"). + */ + dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); + /* * If we modify a dataset in the same txg that we want to destroy it, * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. @@ -889,14 +895,14 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp) zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; uint64_t dirty_min_bytes = zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100; - boolean_t rv; + uint64_t dirty; mutex_enter(&dp->dp_lock); - if (dp->dp_dirty_total > dirty_min_bytes) - txg_kick(dp); - rv = (dp->dp_dirty_total > delay_min_bytes); + dirty = dp->dp_dirty_total; mutex_exit(&dp->dp_lock); - return (rv); + if (dirty > dirty_min_bytes) + txg_kick(dp); + return (dirty > delay_min_bytes); } void diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 9f892acdbf80..784a7308b088 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -649,7 +649,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t zapobj, intval, dummy; + uint64_t zapobj, intval, dummy, count; int isint; char valbuf[32]; const char *valstr = NULL; @@ -663,7 +663,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, if (ds->ds_is_snapshot) { ASSERT(version >= SPA_VERSION_SNAP_PROPS); - if (dsl_dataset_phys(ds)->ds_props_obj == 0) { + if (dsl_dataset_phys(ds)->ds_props_obj == 0 && + (source & ZPROP_SRC_NONE) == 0) { dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_props_obj = zap_create(mos, @@ -674,6 +675,10 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, zapobj = dsl_dir_phys(ds->ds_dir)->dd_props_zapobj; } + /* If we are removing objects from a non-existent ZAP just return */ + if (zapobj == 0) + return; + if (version < SPA_VERSION_RECVD_PROPS) { if (source & ZPROP_SRC_NONE) source = ZPROP_SRC_NONE; @@ -755,6 +760,18 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, strfree(inheritstr); strfree(recvdstr); + /* + * If we are left with an empty snap zap we can destroy it. + * This will prevent unnecessary calls to zap_lookup() in + * the "zfs list" and "zfs get" code paths. + */ + if (ds->ds_is_snapshot && + zap_count(mos, zapobj, &count) == 0 && count == 0) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + dsl_dataset_phys(ds)->ds_props_obj = 0; + zap_destroy(mos, zapobj, tx); + } + if (isint) { VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval)); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index eee122aa6d28..d71113681236 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -22,8 +22,8 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Gary Mills - * Copyright (c) 2017 Datto Inc. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #include @@ -591,6 +591,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx) scn->scn_restart_txg <= tx->tx_txg); } +boolean_t +dsl_scan_resilver_scheduled(dsl_pool_t *dp) +{ + return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) || + (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER)); +} + boolean_t dsl_scan_scrubbing(const dsl_pool_t *dp) { @@ -786,7 +793,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) (void) spa_vdev_state_exit(spa, NULL, 0); if (func == POOL_SCAN_RESILVER) { - dsl_resilver_restart(spa->spa_dsl_pool, 0); + dsl_scan_restart_resilver(spa->spa_dsl_pool, 0); return (0); } @@ -806,41 +813,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } -/* - * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns - * B_TRUE if we have devices that need to be resilvered and are available to - * accept resilver I/Os. - */ -static boolean_t -dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx) -{ - boolean_t resilver_needed = B_FALSE; - spa_t *spa = vd->vdev_spa; - - for (int c = 0; c < vd->vdev_children; c++) { - resilver_needed |= - dsl_scan_clear_deferred(vd->vdev_child[c], tx); - } - - if (vd == spa->spa_root_vdev && - spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { - spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); - vdev_config_dirty(vd); - spa->spa_resilver_deferred = B_FALSE; - return (resilver_needed); - } - - if (!vdev_is_concrete(vd) || vd->vdev_aux || - !vd->vdev_ops->vdev_op_leaf) - return (resilver_needed); - - if (vd->vdev_resilver_deferred) - vd->vdev_resilver_deferred = B_FALSE; - - return (!vdev_is_dead(vd) && !vd->vdev_offline && - vdev_resilver_needed(vd, NULL, NULL)); -} - /* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) @@ -943,21 +915,20 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); /* - * Clear any deferred_resilver flags in the config. + * Clear any resilver_deferred flags in the config. * If there are drives that need resilvering, kick * off an asynchronous request to start resilver. - * dsl_scan_clear_deferred() may update the config + * vdev_clear_resilver_deferred() may update the config * before the resilver can restart. In the event of * a crash during this period, the spa loading code * will find the drives that need to be resilvered - * when the machine reboots and start the resilver then. + * and start the resilver then. */ - boolean_t resilver_needed = - dsl_scan_clear_deferred(spa->spa_root_vdev, tx); - if (resilver_needed) { + if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) && + vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) { spa_history_log_internal(spa, - "starting deferred resilver", tx, - "errors=%llu", spa_get_errlog_size(spa)); + "starting deferred resilver", tx, "errors=%llu", + (u_longlong_t)spa_get_errlog_size(spa)); spa_async_request(spa, SPA_ASYNC_RESILVER); } } @@ -1068,7 +1039,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) /* start a new scan, or restart an existing one. */ void -dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg) +dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg) { if (txg == 0) { dmu_tx_t *tx; @@ -1272,8 +1243,8 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ - /* We only know how to resume from level-0 blocks. */ - if (zb && zb->zb_level != 0) + /* We only know how to resume from level-0 and objset blocks. */ + if (zb && (zb->zb_level != 0 && zb->zb_level != ZB_ROOT_LEVEL)) return (B_FALSE); /* @@ -1304,7 +1275,16 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) || spa_shutting_down(scn->scn_dp->dp_spa) || (zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) { - if (zb) { + if (zb && zb->zb_level == ZB_ROOT_LEVEL) { + dprintf("suspending at first available bookmark " + "%llx/%llx/%llx/%llx\n", + (longlong_t)zb->zb_objset, + (longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (longlong_t)zb->zb_blkid); + SET_BOOKMARK(&scn->scn_phys.scn_bookmark, + zb->zb_objset, 0, 0, 0); + } else if (zb != NULL) { dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, @@ -1908,7 +1888,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, /* * This debugging is commented out to conserve stack space. This - * function is called recursively and the debugging addes several + * function is called recursively and the debugging adds several * bytes to the stack for each call. It can be commented back in * if required to debug an issue in dsl_scan_visitbp(). * @@ -2165,16 +2145,17 @@ ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2, } /* - * Called when a parent dataset and its clone are swapped. If we were + * Called when an origin dataset and its clone are swapped. If we were * currently traversing the dataset, we need to switch to traversing the - * newly promoted parent. + * newly promoted clone. */ void dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) { dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; - uint64_t mintxg; + uint64_t mintxg1, mintxg2; + boolean_t ds1_queued, ds2_queued; if (!dsl_scan_is_running(scn)) return; @@ -2182,44 +2163,81 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); - if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) { - scan_ds_queue_remove(scn, ds1->ds_object); - scan_ds_queue_insert(scn, ds2->ds_object, mintxg); + /* + * Handle the in-memory scan queue. + */ + ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1); + ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2); + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } - if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) { + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * The swapping code below would not handle this case correctly, + * since we can't insert ds2 if it is already there. That's + * because scan_ds_queue_insert() prohibits a duplicate insert + * and panics. + */ + } else if (ds1_queued) { + scan_ds_queue_remove(scn, ds1->ds_object); + scan_ds_queue_insert(scn, ds2->ds_object, mintxg1); + } else if (ds2_queued) { scan_ds_queue_remove(scn, ds2->ds_object); - scan_ds_queue_insert(scn, ds1->ds_object, mintxg); + scan_ds_queue_insert(scn, ds1->ds_object, mintxg2); } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds1->ds_object, &mintxg) == 0) { - int err; - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + /* + * Handle the on-disk scan queue. + * The on-disk state is an out-of-date version of the in-memory state, + * so the in-memory and on-disk values for ds1_queued and ds2_queued may + * be different. Therefore we need to apply the swap logic to the + * on-disk state independently of the in-memory state. + */ + ds1_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0; + ds2_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0; + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * Alternatively, we could check for EEXIST from + * zap_add_int_key() and back out to the original state, but + * that would be more work than checking for this case upfront. + */ + } else if (ds1_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); - err = zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); - VERIFY(err == 0 || err == EEXIST); - if (err == EEXIST) { - /* Both were there to begin with */ - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, - ds1->ds_object, mintxg, tx)); - } + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); - } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds2->ds_object, &mintxg) == 0) { - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + } else if (ds2_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds2->ds_object, @@ -3025,8 +3043,10 @@ dsl_scan_async_block_should_pause(dsl_scan_t *scn) if (zfs_recover) return (B_FALSE); - if (scn->scn_visited_this_txg >= zfs_async_block_max_blocks) + if (zfs_async_block_max_blocks != 0 && + scn->scn_visited_this_txg >= zfs_async_block_max_blocks) { return (B_TRUE); + } elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || @@ -3348,7 +3368,7 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx) /* * This is the primary entry point for scans that is called from syncing * context. Scans must happen entirely during syncing context so that we - * cna guarantee that blocks we are currently scanning will not change out + * can guarantee that blocks we are currently scanning will not change out * from under us. While a scan is active, this function controls how quickly * transaction groups proceed, instead of the normal handling provided by * txg_sync_thread(). @@ -3629,6 +3649,13 @@ count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp) { int i; + /* + * Don't count embedded bp's, since we already did the work of + * scanning these when we scanned the containing block. + */ + if (BP_IS_EMBEDDED(bp)) + return; + /* * Update the spa's stats on how many bytes we have issued. * Sequential scrubs create a zio for each DVA of the bp. Each @@ -3945,7 +3972,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags, * As can be seen, at fill_ratio=3, the algorithm is slightly biased towards * extents that are more completely filled (in a 3:2 ratio) vs just larger. * Note that as an optimization, we replace multiplication and division by - * 100 with bitshifting by 7 (which effecitvely multiplies and divides by 128). + * 100 with bitshifting by 7 (which effectively multiplies and divides by 128). */ static int ext_size_compare(const void *x, const void *y) @@ -4173,6 +4200,36 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp) dsl_scan_freed_dva(spa, bp, i); } +/* + * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has + * not started, start it. Otherwise, only restart if max txg in DTL range is + * greater than the max txg in the current scan. If the DTL max is less than + * the scan max, then the vdev has not missed any new data since the resilver + * started, so a restart is not needed. + */ +void +dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd) +{ + uint64_t min, max; + + if (!vdev_resilver_needed(vd, &min, &max)) + return; + + if (!dsl_scan_resilvering(dp)) { + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); + return; + } + + if (max <= dp->dp_scan->scn_phys.scn_max_txg) + return; + + /* restart is needed, check if it can be deferred */ + if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_defer_resilver(vd); + else + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); +} + #if defined(_KERNEL) /* CSTYLED */ module_param(zfs_scan_vdev_limit, ulong, 0644); diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index b63ce5cad90c..2d6ca8549eb9 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -41,7 +41,7 @@ dsl_null_checkfunc(void *arg, dmu_tx_t *tx) static int dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, - dsl_syncfunc_t *syncfunc, void *arg, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, boolean_t early) { spa_t *spa; @@ -85,6 +85,11 @@ dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, dmu_tx_commit(tx); + if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) { + /* current contract is to call func once */ + sigfunc(arg, tx); + sigfunc = NULL; /* in case we're performing an EAGAIN retry */ + } txg_wait_synced(dp, dst.dst_txg); if (dst.dst_error == EAGAIN) { @@ -124,7 +129,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_FALSE)); } @@ -138,7 +143,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, * For that reason, early synctasks can affect the process of writing dirty * changes to disk for the txg that they run and should be used with caution. * In addition, early synctasks should not dirty any metaslabs as this would - * invalidate the precodition/invariant for subsequent early synctasks. + * invalidate the precondition/invariant for subsequent early synctasks. * [see dsl_pool_sync() and dsl_early_sync_task_verify()] */ int @@ -146,10 +151,23 @@ dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_TRUE)); } +/* + * A standard synctask that can be interrupted from a signal. The sigfunc + * is called once if a signal occurred while waiting for the task to sync. + */ +int +dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, + int blocks_modified, zfs_space_check_t space_check) +{ + return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg, + blocks_modified, space_check, B_FALSE)); +} + static void dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx, diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index 638805d0b92b..2b2182fadec5 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -302,7 +302,7 @@ dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) * holds is nvl of snapname -> holdname * errlist will be filled in with snapname -> error * - * The snaphosts must all be in the same pool. + * The snapshots must all be in the same pool. * * Holds for snapshots that don't exist will be skipped. * @@ -556,9 +556,9 @@ dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) * errlist will be filled in with snapname -> error * * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots, - * otherwise they should be the names of shapshots. + * otherwise they should be the names of snapshots. * - * As a release may cause snapshots to be destroyed this trys to ensure they + * As a release may cause snapshots to be destroyed this tries to ensure they * aren't mounted. * * The release of non-existent holds are skipped. diff --git a/module/zfs/fm.c b/module/zfs/fm.c index cc5225dcbbef..98a844820b3a 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -31,7 +31,7 @@ * Name-Value Pair Lists * * The embodiment of an FMA protocol element (event, fmri or authority) is a - * name-value pair list (nvlist_t). FMA-specific nvlist construtor and + * name-value pair list (nvlist_t). FMA-specific nvlist constructor and * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used * to create an nvpair list using custom allocators. Callers may choose to * allocate either from the kernel memory allocator, or from a preallocated @@ -683,8 +683,7 @@ zfs_zevent_wait(zfs_zevent_t *ze) break; } - error = cv_timedwait_sig(&zevent_cv, &zevent_lock, - ddi_get_lbolt() + MSEC_TO_TICK(10)); + error = cv_wait_sig(&zevent_cv, &zevent_lock); if (signal_pending(current)) { error = SET_ERROR(EINTR); break; @@ -785,7 +784,7 @@ zfs_zevent_destroy(zfs_zevent_t *ze) #endif /* _KERNEL */ /* - * Wrapppers for FM nvlist allocators + * Wrappers for FM nvlist allocators */ /* ARGSUSED */ static void * diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index ec89810b48ab..faa175b7e7aa 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -103,12 +103,27 @@ int zfs_mg_noalloc_threshold = 0; /* * Metaslab groups are considered eligible for allocations if their - * fragmenation metric (measured as a percentage) is less than or equal to - * zfs_mg_fragmentation_threshold. If a metaslab group exceeds this threshold - * then it will be skipped unless all metaslab groups within the metaslab - * class have also crossed this threshold. + * fragmenation metric (measured as a percentage) is less than or + * equal to zfs_mg_fragmentation_threshold. If a metaslab group + * exceeds this threshold then it will be skipped unless all metaslab + * groups within the metaslab class have also crossed this threshold. + * + * This tunable was introduced to avoid edge cases where we continue + * allocating from very fragmented disks in our pool while other, less + * fragmented disks, exists. On the other hand, if all disks in the + * pool are uniformly approaching the threshold, the threshold can + * be a speed bump in performance, where we keep switching the disks + * that we allocate from (e.g. we allocate some segments from disk A + * making it bypassing the threshold while freeing segments from disk + * B getting its fragmentation below the threshold). + * + * Empirically, we've seen that our vdev selection for allocations is + * good enough that fragmentation increases uniformly across all vdevs + * the majority of the time. Thus we set the threshold percentage high + * enough to avoid hitting the speed bump on pools that are being pushed + * to the edge. */ -int zfs_mg_fragmentation_threshold = 85; +int zfs_mg_fragmentation_threshold = 95; /* * Allow metaslabs to keep their active state as long as their fragmentation @@ -144,6 +159,30 @@ uint64_t metaslab_df_alloc_threshold = SPA_OLD_MAXBLOCKSIZE; */ int metaslab_df_free_pct = 4; +/* + * Maximum distance to search forward from the last offset. Without this + * limit, fragmented pools can see >100,000 iterations and + * metaslab_block_picker() becomes the performance limiting factor on + * high-performance storage. + * + * With the default setting of 16MB, we typically see less than 500 + * iterations, even with very fragmented, ashift=9 pools. The maximum number + * of iterations possible is: + * metaslab_df_max_search / (2 * (1<ms_lock)); ASSERT(MUTEX_HELD(&mg->mg_lock)); ASSERT(msp->ms_group == mg); + avl_remove(&mg->mg_metaslab_tree, msp); msp->ms_weight = weight; avl_add(&mg->mg_metaslab_tree, msp); @@ -1185,8 +1226,7 @@ metaslab_block_find(avl_tree_t *t, uint64_t start, uint64_t size) return (rs); } -#if defined(WITH_FF_BLOCK_ALLOCATOR) || \ - defined(WITH_DF_BLOCK_ALLOCATOR) || \ +#if defined(WITH_DF_BLOCK_ALLOCATOR) || \ defined(WITH_CF_BLOCK_ALLOCATOR) /* * This is a helper function that can be used by the allocator to find @@ -1195,13 +1235,16 @@ metaslab_block_find(avl_tree_t *t, uint64_t start, uint64_t size) */ static uint64_t metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, - uint64_t align) + uint64_t max_search) { range_seg_t *rs = metaslab_block_find(t, *cursor, size); + uint64_t first_found; - while (rs != NULL) { - uint64_t offset = P2ROUNDUP(rs->rs_start, align); + if (rs != NULL) + first_found = rs->rs_start; + while (rs != NULL && rs->rs_start - first_found <= max_search) { + uint64_t offset = rs->rs_start; if (offset + size <= rs->rs_end) { *cursor = offset + size; return (offset); @@ -1209,55 +1252,30 @@ metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, rs = AVL_NEXT(t, rs); } - /* - * If we know we've searched the whole map (*cursor == 0), give up. - * Otherwise, reset the cursor to the beginning and try again. - */ - if (*cursor == 0) - return (-1ULL); - *cursor = 0; - return (metaslab_block_picker(t, cursor, size, align)); -} -#endif /* WITH_FF/DF/CF_BLOCK_ALLOCATOR */ - -#if defined(WITH_FF_BLOCK_ALLOCATOR) -/* - * ========================================================================== - * The first-fit block allocator - * ========================================================================== - */ -static uint64_t -metaslab_ff_alloc(metaslab_t *msp, uint64_t size) -{ - /* - * Find the largest power of 2 block size that evenly divides the - * requested size. This is used to try to allocate blocks with similar - * alignment from the same area of the metaslab (i.e. same cursor - * bucket) but it does not guarantee that other allocations sizes - * may exist in the same region. - */ - uint64_t align = size & -size; - uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1]; - avl_tree_t *t = &msp->ms_allocatable->rt_root; - - return (metaslab_block_picker(t, cursor, size, align)); + return (-1ULL); } - -static metaslab_ops_t metaslab_ff_ops = { - metaslab_ff_alloc -}; - -metaslab_ops_t *zfs_metaslab_ops = &metaslab_ff_ops; -#endif /* WITH_FF_BLOCK_ALLOCATOR */ +#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */ #if defined(WITH_DF_BLOCK_ALLOCATOR) /* * ========================================================================== - * Dynamic block allocator - - * Uses the first fit allocation scheme until space get low and then - * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold - * and metaslab_df_free_pct to determine when to switch the allocation scheme. + * Dynamic Fit (df) block allocator + * + * Search for a free chunk of at least this size, starting from the last + * offset (for this alignment of block) looking for up to + * metaslab_df_max_search bytes (16MB). If a large enough free chunk is not + * found within 16MB, then return a free chunk of exactly the requested size (or + * larger). + * + * If it seems like searching from the last offset will be unproductive, skip + * that and just return a free chunk of exactly the requested size (or larger). + * This is based on metaslab_df_alloc_threshold and metaslab_df_free_pct. This + * mechanism is probably not very useful and may be removed in the future. + * + * The behavior when not searching can be changed to return the largest free + * chunk, instead of a free chunk of exactly the requested size, by setting + * metaslab_df_use_largest_segment. * ========================================================================== */ static uint64_t @@ -1273,28 +1291,42 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size) uint64_t align = size & -size; uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1]; range_tree_t *rt = msp->ms_allocatable; - avl_tree_t *t = &rt->rt_root; - uint64_t max_size = metaslab_block_maxsize(msp); int free_pct = range_tree_space(rt) * 100 / msp->ms_size; + uint64_t offset; ASSERT(MUTEX_HELD(&msp->ms_lock)); - ASSERT3U(avl_numnodes(t), ==, + ASSERT3U(avl_numnodes(&rt->rt_root), ==, avl_numnodes(&msp->ms_allocatable_by_size)); - if (max_size < size) - return (-1ULL); - /* - * If we're running low on space switch to using the size - * sorted AVL tree (best-fit). + * If we're running low on space, find a segment based on size, + * rather than iterating based on offset. */ - if (max_size < metaslab_df_alloc_threshold || + if (metaslab_block_maxsize(msp) < metaslab_df_alloc_threshold || free_pct < metaslab_df_free_pct) { - t = &msp->ms_allocatable_by_size; - *cursor = 0; + offset = -1; + } else { + offset = metaslab_block_picker(&rt->rt_root, + cursor, size, metaslab_df_max_search); + } + + if (offset == -1) { + range_seg_t *rs; + if (metaslab_df_use_largest_segment) { + /* use largest free segment */ + rs = avl_last(&msp->ms_allocatable_by_size); + } else { + /* use segment of this size, or next largest */ + rs = metaslab_block_find(&msp->ms_allocatable_by_size, + 0, size); + } + if (rs != NULL && rs->rs_start + size <= rs->rs_end) { + offset = rs->rs_start; + *cursor = offset + size; + } } - return (metaslab_block_picker(t, cursor, size, 1ULL)); + return (offset); } static metaslab_ops_t metaslab_df_ops = { @@ -1764,6 +1796,7 @@ metaslab_unload(metaslab_t *msp) range_tree_vacate(msp->ms_allocatable, NULL, NULL); msp->ms_loaded = B_FALSE; + msp->ms_activation_weight = 0; msp->ms_weight &= ~METASLAB_ACTIVE_MASK; msp->ms_max_size = 0; @@ -2294,11 +2327,10 @@ metaslab_segment_weight(metaslab_t *msp) boolean_t metaslab_should_allocate(metaslab_t *msp, uint64_t asize) { - boolean_t should_allocate; - if (msp->ms_max_size != 0) return (msp->ms_max_size >= asize); + boolean_t should_allocate; if (!WEIGHT_IS_SPACEBASED(msp->ms_weight)) { /* * The metaslab segment weight indicates segments in the @@ -2312,6 +2344,7 @@ metaslab_should_allocate(metaslab_t *msp, uint64_t asize) should_allocate = (asize <= (msp->ms_weight & ~METASLAB_WEIGHT_TYPE)); } + return (should_allocate); } static uint64_t @@ -2359,6 +2392,8 @@ metaslab_weight(metaslab_t *msp) void metaslab_recalculate_weight_and_sort(metaslab_t *msp) { + ASSERT(MUTEX_HELD(&msp->ms_lock)); + /* note: we preserve the mask (e.g. indication of primary, etc..) */ uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; metaslab_group_sort(msp->ms_group, msp, @@ -2369,16 +2404,18 @@ static int metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp, int allocator, uint64_t activation_weight) { + ASSERT(MUTEX_HELD(&msp->ms_lock)); + /* * If we're activating for the claim code, we don't want to actually * set the metaslab up for a specific allocator. */ if (activation_weight == METASLAB_WEIGHT_CLAIM) return (0); + metaslab_t **arr = (activation_weight == METASLAB_WEIGHT_PRIMARY ? mg->mg_primaries : mg->mg_secondaries); - ASSERT(MUTEX_HELD(&msp->ms_lock)); mutex_enter(&mg->mg_lock); if (arr[allocator] != NULL) { mutex_exit(&mg->mg_lock); @@ -2399,28 +2436,77 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight) { ASSERT(MUTEX_HELD(&msp->ms_lock)); - if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { - int error = metaslab_load(msp); - if (error != 0) { - metaslab_group_sort(msp->ms_group, msp, 0); - return (error); - } - if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { - /* - * The metaslab was activated for another allocator - * while we were waiting, we should reselect. - */ + /* + * The current metaslab is already activated for us so there + * is nothing to do. Already activated though, doesn't mean + * that this metaslab is activated for our allocator nor our + * requested activation weight. The metaslab could have started + * as an active one for our allocator but changed allocators + * while we were waiting to grab its ms_lock or we stole it + * [see find_valid_metaslab()]. This means that there is a + * possibility of passivating a metaslab of another allocator + * or from a different activation mask, from this thread. + */ + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { + ASSERT(msp->ms_loaded); + return (0); + } + + int error = metaslab_load(msp); + if (error != 0) { + metaslab_group_sort(msp->ms_group, msp, 0); + return (error); + } + + /* + * When entering metaslab_load() we may have dropped the + * ms_lock because we were loading this metaslab, or we + * were waiting for another thread to load it for us. In + * that scenario, we recheck the weight of the metaslab + * to see if it was activated by another thread. + * + * If the metaslab was activated for another allocator or + * it was activated with a different activation weight (e.g. + * we wanted to make it a primary but it was activated as + * secondary) we return error (EBUSY). + * + * If the metaslab was activated for the same allocator + * and requested activation mask, skip activating it. + */ + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) { + if (msp->ms_allocator != allocator) + return (EBUSY); + + if ((msp->ms_weight & activation_weight) == 0) return (SET_ERROR(EBUSY)); - } - if ((error = metaslab_activate_allocator(msp->ms_group, msp, - allocator, activation_weight)) != 0) { - return (error); - } - msp->ms_activation_weight = msp->ms_weight; - metaslab_group_sort(msp->ms_group, msp, - msp->ms_weight | activation_weight); + EQUIV((activation_weight == METASLAB_WEIGHT_PRIMARY), + msp->ms_primary); + return (0); + } + + /* + * If the metaslab has literally 0 space, it will have weight 0. In + * that case, don't bother activating it. This can happen if the + * metaslab had space during find_valid_metaslab, but another thread + * loaded it and used all that space while we were waiting to grab the + * lock. + */ + if (msp->ms_weight == 0) { + ASSERT0(range_tree_space(msp->ms_allocatable)); + return (SET_ERROR(ENOSPC)); + } + + if ((error = metaslab_activate_allocator(msp->ms_group, msp, + allocator, activation_weight)) != 0) { + return (error); } + + ASSERT0(msp->ms_activation_weight); + msp->ms_activation_weight = msp->ms_weight; + metaslab_group_sort(msp->ms_group, msp, + msp->ms_weight | activation_weight); + ASSERT(msp->ms_loaded); ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); @@ -2432,6 +2518,8 @@ metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) { ASSERT(MUTEX_HELD(&msp->ms_lock)); + ASSERT(msp->ms_loaded); + if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) { metaslab_group_sort(mg, msp, weight); return; @@ -2439,15 +2527,16 @@ metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp, mutex_enter(&mg->mg_lock); ASSERT3P(msp->ms_group, ==, mg); + ASSERT3S(0, <=, msp->ms_allocator); + ASSERT3U(msp->ms_allocator, <, mg->mg_allocators); + if (msp->ms_primary) { - ASSERT3U(0, <=, msp->ms_allocator); - ASSERT3U(msp->ms_allocator, <, mg->mg_allocators); ASSERT3P(mg->mg_primaries[msp->ms_allocator], ==, msp); ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); mg->mg_primaries[msp->ms_allocator] = NULL; } else { - ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp); + ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); mg->mg_secondaries[msp->ms_allocator] = NULL; } msp->ms_allocator = -1; @@ -2470,9 +2559,10 @@ metaslab_passivate(metaslab_t *msp, uint64_t weight) range_tree_space(msp->ms_allocatable) == 0); ASSERT0(weight & METASLAB_ACTIVE_MASK); + ASSERT(msp->ms_activation_weight != 0); msp->ms_activation_weight = 0; metaslab_passivate_allocator(msp->ms_group, msp, weight); - ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); + ASSERT0(msp->ms_weight & METASLAB_ACTIVE_MASK); } /* @@ -2742,12 +2832,19 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) /* * Normally, we don't want to process a metaslab if there are no * allocations or frees to perform. However, if the metaslab is being - * forced to condense and it's loaded, we need to let it through. + * forced to condense, it's loaded and we're not beyond the final + * dirty txg, we need to let it through. Not condensing beyond the + * final dirty txg prevents an issue where metaslabs that need to be + * condensed but were loaded for other reasons could cause a panic + * here. By only checking the txg in that branch of the conditional, + * we preserve the utility of the VERIFY statements in all other + * cases. */ if (range_tree_is_empty(alloctree) && range_tree_is_empty(msp->ms_freeing) && range_tree_is_empty(msp->ms_checkpointing) && - !(msp->ms_loaded && msp->ms_condense_wanted)) + !(msp->ms_loaded && msp->ms_condense_wanted && + txg <= spa_final_dirty_txg(spa))) return; @@ -2934,6 +3031,30 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) dmu_tx_commit(tx); } +void +metaslab_potentially_unload(metaslab_t *msp, uint64_t txg) +{ + /* + * If the metaslab is loaded and we've not tried to load or allocate + * from it in 'metaslab_unload_delay' txgs, then unload it. + */ + if (msp->ms_loaded && + msp->ms_disabled == 0 && + msp->ms_selected_txg + metaslab_unload_delay < txg) { + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { + VERIFY0(range_tree_space( + msp->ms_allocating[(txg + t) & TXG_MASK])); + } + if (msp->ms_allocator != -1) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } + + if (!metaslab_debug_unload) + metaslab_unload(msp); + } +} + /* * Called after a transaction group has completely synced to mark * all of the metaslab's free space as usable. @@ -3071,27 +3192,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) */ metaslab_recalculate_weight_and_sort(msp); - /* - * If the metaslab is loaded and we've not tried to load or allocate - * from it in 'metaslab_unload_delay' txgs, then unload it. - */ - if (msp->ms_loaded && - msp->ms_disabled == 0 && - msp->ms_selected_txg + metaslab_unload_delay < txg) { - - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { - VERIFY0(range_tree_space( - msp->ms_allocating[(txg + t) & TXG_MASK])); - } - if (msp->ms_allocator != -1) { - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); - } - - if (!metaslab_debug_unload) - metaslab_unload(msp); - } - ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK])); ASSERT0(range_tree_space(msp->ms_freeing)); ASSERT0(range_tree_space(msp->ms_freed)); @@ -3456,6 +3556,41 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight, return (msp); } +void +metaslab_active_mask_verify(metaslab_t *msp) +{ + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0) + return; + + if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) + return; + + if (msp->ms_weight & METASLAB_WEIGHT_PRIMARY) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM); + VERIFY3S(msp->ms_allocator, !=, -1); + VERIFY(msp->ms_primary); + return; + } + + if (msp->ms_weight & METASLAB_WEIGHT_SECONDARY) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM); + VERIFY3S(msp->ms_allocator, !=, -1); + VERIFY(!msp->ms_primary); + return; + } + + if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) { + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY); + VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY); + VERIFY3S(msp->ms_allocator, ==, -1); + return; + } +} + /* ARGSUSED */ static uint64_t metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, @@ -3464,9 +3599,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, { metaslab_t *msp = NULL; uint64_t offset = -1ULL; - uint64_t activation_weight; - activation_weight = METASLAB_WEIGHT_PRIMARY; + uint64_t activation_weight = METASLAB_WEIGHT_PRIMARY; for (int i = 0; i < d; i++) { if (activation_weight == METASLAB_WEIGHT_PRIMARY && DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { @@ -3507,10 +3641,30 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (activation_weight == METASLAB_WEIGHT_PRIMARY && mg->mg_primaries[allocator] != NULL) { msp = mg->mg_primaries[allocator]; + + /* + * Even though we don't hold the ms_lock for the + * primary metaslab, those fields should not + * change while we hold the mg_lock. Thus is is + * safe to make assertions on them. + */ + ASSERT(msp->ms_primary); + ASSERT3S(msp->ms_allocator, ==, allocator); + ASSERT(msp->ms_loaded); + was_active = B_TRUE; } else if (activation_weight == METASLAB_WEIGHT_SECONDARY && mg->mg_secondaries[allocator] != NULL) { msp = mg->mg_secondaries[allocator]; + + /* + * See comment above about the similar assertions + * for the primary metaslab. + */ + ASSERT(!msp->ms_primary); + ASSERT3S(msp->ms_allocator, ==, allocator); + ASSERT(msp->ms_loaded); + was_active = B_TRUE; } else { msp = find_valid_metaslab(mg, activation_weight, dva, d, @@ -3523,8 +3677,20 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, kmem_free(search, sizeof (*search)); return (-1ULL); } - mutex_enter(&msp->ms_lock); + + metaslab_active_mask_verify(msp); + + /* + * This code is disabled out because of issues with + * tracepoints in non-gpl kernel modules. + */ +#if 0 + DTRACE_PROBE3(ms__activation__attempt, + metaslab_t *, msp, uint64_t, activation_weight, + boolean_t, was_active); +#endif + /* * Ensure that the metaslab we have selected is still * capable of handling our request. It's possible that @@ -3534,44 +3700,80 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, * a new metaslab. */ if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) { + ASSERT3S(msp->ms_allocator, ==, -1); mutex_exit(&msp->ms_lock); continue; } /* - * If the metaslab is freshly activated for an allocator that - * isn't the one we're allocating from, or if it's a primary and - * we're seeking a secondary (or vice versa), we go back and - * select a new metaslab. + * If the metaslab was activated for another allocator + * while we were waiting in the ms_lock above, or it's + * a primary and we're seeking a secondary (or vice versa), + * we go back and select a new metaslab. */ if (!was_active && (msp->ms_weight & METASLAB_ACTIVE_MASK) && (msp->ms_allocator != -1) && (msp->ms_allocator != allocator || ((activation_weight == METASLAB_WEIGHT_PRIMARY) != msp->ms_primary))) { + ASSERT(msp->ms_loaded); + ASSERT((msp->ms_weight & METASLAB_WEIGHT_CLAIM) || + msp->ms_allocator != -1); mutex_exit(&msp->ms_lock); continue; } + /* + * This metaslab was used for claiming regions allocated + * by the ZIL during pool import. Once these regions are + * claimed we don't need to keep the CLAIM bit set + * anymore. Passivate this metaslab to zero its activation + * mask. + */ if (msp->ms_weight & METASLAB_WEIGHT_CLAIM && activation_weight != METASLAB_WEIGHT_CLAIM) { + ASSERT(msp->ms_loaded); + ASSERT3S(msp->ms_allocator, ==, -1); metaslab_passivate(msp, msp->ms_weight & ~METASLAB_WEIGHT_CLAIM); mutex_exit(&msp->ms_lock); continue; } - if (metaslab_activate(msp, allocator, activation_weight) != 0) { + msp->ms_selected_txg = txg; + + int activation_error = + metaslab_activate(msp, allocator, activation_weight); + metaslab_active_mask_verify(msp); + + /* + * If the metaslab was activated by another thread for + * another allocator or activation_weight (EBUSY), or it + * failed because another metaslab was assigned as primary + * for this allocator (EEXIST) we continue using this + * metaslab for our allocation, rather than going on to a + * worse metaslab (we waited for that metaslab to be loaded + * after all). + * + * If the activation failed due to an I/O error or ENOSPC we + * skip to the next metaslab. + */ + boolean_t activated; + if (activation_error == 0) { + activated = B_TRUE; + } else if (activation_error == EBUSY || + activation_error == EEXIST) { + activated = B_FALSE; + } else { mutex_exit(&msp->ms_lock); continue; } - - msp->ms_selected_txg = txg; + ASSERT(msp->ms_loaded); /* * Now that we have the lock, recheck to see if we should * continue to use this metaslab for this allocation. The - * the metaslab is now loaded so metaslab_should_allocate() can - * accurately determine if the allocation attempt should + * the metaslab is now loaded so metaslab_should_allocate() + * can accurately determine if the allocation attempt should * proceed. */ if (!metaslab_should_allocate(msp, asize)) { @@ -3581,10 +3783,9 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, goto next; } - /* - * If this metaslab is currently condensing then pick again as - * we can't manipulate this metaslab until it's committed + * If this metaslab is currently condensing then pick again + * as we can't manipulate this metaslab until it's committed * to disk. If this metaslab is being initialized, we shouldn't * allocate from it since the allocated region might be * overwritten after allocation. @@ -3592,15 +3793,19 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (msp->ms_condensing) { metaslab_trace_add(zal, mg, msp, asize, d, TRACE_CONDENSING, allocator); - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); + if (activated) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } mutex_exit(&msp->ms_lock); continue; } else if (msp->ms_disabled > 0) { metaslab_trace_add(zal, mg, msp, asize, d, TRACE_DISABLED, allocator); - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); + if (activated) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } mutex_exit(&msp->ms_lock); continue; } @@ -3610,12 +3815,22 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, if (offset != -1ULL) { /* Proactively passivate the metaslab, if needed */ - metaslab_segment_may_passivate(msp); + if (activated) + metaslab_segment_may_passivate(msp); break; } next: ASSERT(msp->ms_loaded); + /* + * This code is disabled out because of issues with + * tracepoints in non-gpl kernel modules. + */ +#if 0 + DTRACE_PROBE2(ms__alloc__failure, metaslab_t *, msp, + uint64_t, asize); +#endif + /* * We were unable to allocate from this metaslab so determine * a new weight for this metaslab. Now that we have loaded @@ -3637,14 +3852,33 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal, * currently available for allocation and is accurate * even within a sync pass. */ + uint64_t weight; if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) { - uint64_t weight = metaslab_block_maxsize(msp); + weight = metaslab_block_maxsize(msp); WEIGHT_SET_SPACEBASED(weight); + } else { + weight = metaslab_weight_from_range_tree(msp); + } + + if (activated) { metaslab_passivate(msp, weight); } else { - metaslab_passivate(msp, - metaslab_weight_from_range_tree(msp)); + /* + * For the case where we use the metaslab that is + * active for another allocator we want to make + * sure that we retain the activation mask. + * + * Note that we could attempt to use something like + * metaslab_recalculate_weight_and_sort() that + * retains the activation mask here. That function + * uses metaslab_weight() to set the weight though + * which is not as accurate as the calculations + * above. + */ + weight |= msp->ms_weight & METASLAB_ACTIVE_MASK; + metaslab_group_sort(mg, msp, weight); } + metaslab_active_mask_verify(msp); /* * We have just failed an allocation attempt, check @@ -4805,6 +5039,14 @@ MODULE_PARM_DESC(zfs_metaslab_switch_threshold, module_param(metaslab_force_ganging, ulong, 0644); MODULE_PARM_DESC(metaslab_force_ganging, "blocks larger than this size are forced to be gang blocks"); + +module_param(metaslab_df_max_search, int, 0644); +MODULE_PARM_DESC(metaslab_df_max_search, + "max distance (bytes) to search forward before using size tree"); + +module_param(metaslab_df_use_largest_segment, int, 0644); +MODULE_PARM_DESC(metaslab_df_use_largest_segment, + "when looking in size tree, use largest segment instead of exact fit"); /* END CSTYLED */ #endif diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index cd5603a1a5cd..810d20fdd95c 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -87,12 +87,12 @@ * * In this case, a weak guarantee is provided. Since the host which last had * the pool imported will suspend the pool if no mmp writes land within - * fail_intervals * multihost_interval ms, the absense of writes during that + * fail_intervals * multihost_interval ms, the absence of writes during that * time means either the pool is not imported, or it is imported but the pool * is suspended and no further writes will occur. * * Note that resuming the suspended pool on the remote host would invalidate - * this gurantee, and so it is not allowed. + * this guarantee, and so it is not allowed. * * The factor of 2 provides a conservative safety factor and derives from * MMP_IMPORT_SAFETY_FACTOR; @@ -672,7 +672,7 @@ mmp_thread(void *arg) CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv, - &mmp->mmp_thread_lock, next_time, USEC2NSEC(1), + &mmp->mmp_thread_lock, next_time, USEC2NSEC(100), CALLOUT_FLAG_ABSOLUTE); CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock); } diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 2a594c56cbd5..b74ee0f0670a 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -363,6 +363,28 @@ multilist_sublist_remove(multilist_sublist_t *mls, void *obj) list_remove(&mls->mls_list, obj); } +int +multilist_sublist_is_empty(multilist_sublist_t *mls) +{ + ASSERT(MUTEX_HELD(&mls->mls_lock)); + return (list_is_empty(&mls->mls_list)); +} + +int +multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) +{ + multilist_sublist_t *mls; + int empty; + + ASSERT3U(sublist_idx, <, ml->ml_num_sublists); + mls = &ml->ml_sublists[sublist_idx]; + ASSERT(!MUTEX_HELD(&mls->mls_lock)); + mutex_enter(&mls->mls_lock); + empty = list_is_empty(&mls->mls_list); + mutex_exit(&mls->mls_lock); + return (empty); +} + void * multilist_sublist_head(multilist_sublist_t *mls) { diff --git a/module/zfs/pathname.c b/module/zfs/pathname.c index e3e97c9bb365..4766762f37d1 100644 --- a/module/zfs/pathname.c +++ b/module/zfs/pathname.c @@ -71,9 +71,12 @@ pn_alloc(struct pathname *pnp) void pn_alloc_sz(struct pathname *pnp, size_t sz) { - pnp->pn_path = pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); - pnp->pn_pathlen = 0; + pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); pnp->pn_bufsize = sz; +#if 0 /* unused in ZoL */ + pnp->pn_path = pnp->pn_buf; + pnp->pn_pathlen = 0; +#endif } /* @@ -84,6 +87,10 @@ pn_free(struct pathname *pnp) { /* pn_bufsize is usually MAXPATHLEN, but may not be */ kmem_free(pnp->pn_buf, pnp->pn_bufsize); - pnp->pn_path = pnp->pn_buf = NULL; - pnp->pn_pathlen = pnp->pn_bufsize = 0; + pnp->pn_buf = NULL; + pnp->pn_bufsize = 0; +#if 0 /* unused in ZoL */ + pnp->pn_path = NULL; + pnp->pn_pathlen = 0; +#endif } diff --git a/module/zfs/policy.c b/module/zfs/policy.c index 55c932747915..7f9456a670eb 100644 --- a/module/zfs/policy.c +++ b/module/zfs/policy.c @@ -70,7 +70,7 @@ static int priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err) { /* - * All priv_policy_user checks are preceeded by kuid/kgid_has_mapping() + * All priv_policy_user checks are preceded by kuid/kgid_has_mapping() * checks. If we cannot do them, we shouldn't be using ns_capable() * since we don't know whether the affected files are valid in our * namespace. Note that kuid_has_mapping() came after cred->user_ns, so @@ -209,7 +209,7 @@ secpolicy_vnode_setdac(const cred_t *cr, uid_t owner) int secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot) { - return (0); + return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM)); } /* diff --git a/module/zfs/qat.h b/module/zfs/qat.h index 9014c03148ba..9ae8eb173572 100644 --- a/module/zfs/qat.h +++ b/module/zfs/qat.h @@ -40,11 +40,6 @@ typedef enum qat_encrypt_dir { #include "dc/cpa_dc.h" #include "lac/cpa_cy_sym.h" -/* - * Timeout - no response from hardware after 0.5 seconds - */ -#define QAT_TIMEOUT_MS 500 - /* * The minimal and maximal buffer size which are not restricted * in the QAT hardware, but with the input buffer size between 4KB @@ -85,7 +80,7 @@ typedef struct qat_stats { * Number of fails in the QAT compression / decompression engine. * Note: when a QAT error happens, it doesn't necessarily indicate a * critical hardware issue. Sometimes it is because the output buffer - * is not big enough. The compression job will be transfered to the + * is not big enough. The compression job will be transferred to the * gzip software implementation so the functionality of ZFS is not * impacted. */ @@ -118,7 +113,7 @@ typedef struct qat_stats { /* * Number of fails in the QAT encryption / decryption engine. * Note: when a QAT error happens, it doesn't necessarily indicate a - * critical hardware issue. The encryption job will be transfered + * critical hardware issue. The encryption job will be transferred * to the software implementation so the functionality of ZFS is * not impacted. */ @@ -135,7 +130,7 @@ typedef struct qat_stats { /* * Number of fails in the QAT checksum engine. * Note: when a QAT error happens, it doesn't necessarily indicate a - * critical hardware issue. The checksum job will be transfered to the + * critical hardware issue. The checksum job will be transferred to the * software implementation so the functionality of ZFS is not impacted. */ kstat_named_t cksum_fails; diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 1c5c0a4e7256..16649d60f668 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -249,7 +249,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, Cpa8U *buffer_meta_dst = NULL; Cpa32U buffer_meta_size = 0; CpaDcRqResults dc_results; - CpaStatus status = CPA_STATUS_SUCCESS; + CpaStatus status = CPA_STATUS_FAIL; Cpa32U hdr_sz = 0; Cpa32U compressed_sz; Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2; @@ -278,16 +278,19 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) + ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer)); - if (QAT_PHYS_CONTIG_ALLOC(&in_pages, - num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&in_pages, + num_src_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; - if (QAT_PHYS_CONTIG_ALLOC(&out_pages, - num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&out_pages, + num_dst_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; - if (QAT_PHYS_CONTIG_ALLOC(&add_pages, - num_add_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&add_pages, + num_add_buf * sizeof (struct page *)); + if (status != CPA_STATUS_SUCCESS) goto fail; i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst; @@ -296,19 +299,19 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf, &buffer_meta_size); - if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size); + if (status != CPA_STATUS_SUCCESS) goto fail; cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf, &buffer_meta_size); - if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size); + if (status != CPA_STATUS_SUCCESS) goto fail; /* build source buffer list */ - if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size); + if (status != CPA_STATUS_SUCCESS) goto fail; flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1); @@ -316,8 +319,8 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, buf_list_src->pBuffers = flat_buf_src; /* always point to first one */ /* build destination buffer list */ - if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) != - CPA_STATUS_SUCCESS) + status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size); + if (status != CPA_STATUS_SUCCESS) goto fail; flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1); @@ -404,11 +407,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; @@ -463,11 +462,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; @@ -547,7 +542,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, } static int -param_set_qat_compress(const char *val, struct kernel_param *kp) +param_set_qat_compress(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c index 34c19b5823a8..ec9f085cffaf 100644 --- a/module/zfs/qat_crypt.c +++ b/module/zfs/qat_crypt.c @@ -415,6 +415,9 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, op_data.messageLenToCipherInBytes = enc_len; op_data.ivLenInBytes = ZIO_DATA_IV_LEN; bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN); + /* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */ + if (dir == QAT_DECRYPT) + bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN); cb.verify_result = CPA_FALSE; init_completion(&cb.complete); @@ -423,23 +426,21 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; } - /* save digest result to digest_buf */ - bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); - if (dir == QAT_ENCRYPT) + if (dir == QAT_ENCRYPT) { + /* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */ + bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); QAT_STAT_INCR(encrypt_total_out_bytes, enc_len); - else + } else { QAT_STAT_INCR(decrypt_total_out_bytes, enc_len); + } fail: if (status != CPA_STATUS_SUCCESS) @@ -549,11 +550,9 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); + if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; @@ -578,7 +577,7 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) } static int -param_set_qat_encrypt(const char *val, struct kernel_param *kp) +param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; @@ -600,7 +599,7 @@ param_set_qat_encrypt(const char *val, struct kernel_param *kp) } static int -param_set_qat_checksum(const char *val, struct kernel_param *kp) +param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index bcaa6d387539..a7e46d3790a2 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc) } int64_t -zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { reference_t *ref = NULL; int64_t count; @@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_add(zfs_refcount_t *rc, void *holder) +zfs_refcount_add(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_add_many(rc, 1, holder)); } int64_t -zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, + const void *holder) { reference_t *ref; int64_t count; @@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_remove(zfs_refcount_t *rc, void *holder) +zfs_refcount_remove(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_remove_many(rc, 1, holder)); } @@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) void zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, - void *current_holder, void *new_holder) + const void *current_holder, const void *new_holder) { reference_t *ref; boolean_t found = B_FALSE; @@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, } void -zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, - void *new_holder) +zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder, + const void *new_holder) { return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder, new_holder)); @@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, * might be held. */ boolean_t -zfs_refcount_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; @@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder) * since the reference might not be held. */ boolean_t -zfs_refcount_not_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 56a606962a7f..621838396a45 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -83,7 +83,7 @@ * Layouts are simply an array of the attributes and their * ordering i.e. [0, 1, 4, 5, 2] * - * Each distinct layout is given a unique layout number and that is whats + * Each distinct layout is given a unique layout number and that is what's * stored in the header at the beginning of the SA data buffer. * * A layout only covers a single dbuf (bonus or spill). If a set of @@ -95,7 +95,7 @@ * Adding a single attribute will cause the entire set of attributes to * be rewritten and could result in a new layout number being constructed * as part of the rewrite if no such layout exists for the new set of - * attribues. The new attribute will be appended to the end of the already + * attributes. The new attribute will be appended to the end of the already * existing attributes. * * Both the attribute registration and attribute layout information are @@ -1014,7 +1014,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, } sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); - mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&sa->sa_lock, NULL, MUTEX_NOLOCKDEP, NULL); sa->sa_master_obj = sa_obj; os->os_sa = sa; @@ -1380,7 +1380,7 @@ sa_handle_destroy(sa_handle_t *hdl) dmu_buf_rele(hdl->sa_bonus, NULL); if (hdl->sa_spill) - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); mutex_exit(&hdl->sa_lock); kmem_cache_free(sa_cache, hdl); @@ -2028,7 +2028,7 @@ sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, hdl->sa_spill_tab = NULL; } - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); hdl->sa_spill = NULL; } @@ -2131,13 +2131,13 @@ sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) void sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) { - dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); + dmu_object_info_from_db(hdl->sa_bonus, doi); } void sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) { - dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, + dmu_object_size_from_db(hdl->sa_bonus, blksize, nblocks); } @@ -2150,7 +2150,7 @@ sa_set_userp(sa_handle_t *hdl, void *ptr) dmu_buf_t * sa_get_db(sa_handle_t *hdl) { - return ((dmu_buf_t *)hdl->sa_bonus); + return (hdl->sa_bonus); } void * diff --git a/module/zfs/spa.c b/module/zfs/spa.c index bb4de3667b5f..c54989c9b05d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1,46 +1,46 @@ /* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ +* CDDL HEADER START +* +* The contents of this file are subject to the terms of the +* Common Development and Distribution License (the "License"). +* You may not use this file except in compliance with the License. +* +* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +* or http://www.opensolaris.org/os/licensing. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* When distributing Covered Code, include this CDDL HEADER in each +* file and include the License file at usr/src/OPENSOLARIS.LICENSE. +* If applicable, add the following below this CDDL HEADER, with the +* fields enclosed by brackets "[]" replaced with your own identifying +* information: Portions Copyright [yyyy] [name of copyright owner] +* +* CDDL HEADER END +*/ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. - * Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. - * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Toomas Soome - * Copyright (c) 2016 Actifio, Inc. All rights reserved. - * Copyright 2018 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. - * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017, Intel Corporation. - */ +* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 2011, 2019 by Delphix. All rights reserved. +* Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. +* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. +* Copyright 2013 Saso Kiselkov. All rights reserved. +* Copyright (c) 2014 Integros [integros.com] +* Copyright 2016 Toomas Soome +* Copyright (c) 2016 Actifio, Inc. All rights reserved. +* Copyright 2018 Joyent, Inc. +* Copyright (c) 2017, 2019, Datto Inc. All rights reserved. +* Copyright 2017 Joyent, Inc. +* Copyright (c) 2017, Intel Corporation. +*/ /* - * SPA: Storage Pool Allocator - * - * This file contains all the routines used when modifying on-disk SPA state. - * This includes opening, importing, destroying, exporting a pool, and syncing a - * pool. - */ +* SPA: Storage Pool Allocator +* +* This file contains all the routines used when modifying on-disk SPA state. +* This includes opening, importing, destroying, exporting a pool, and syncing a +* pool. +*/ #include #include @@ -91,22 +91,23 @@ #include #include #include +#include #endif /* _KERNEL */ #include "zfs_prop.h" #include "zfs_comutil.h" /* - * The interval, in seconds, at which failed configuration cache file writes - * should be retried. - */ +* The interval, in seconds, at which failed configuration cache file writes +* should be retried. +*/ int zfs_ccw_retry_interval = 300; typedef enum zti_modes { - ZTI_MODE_FIXED, /* value is # of threads (min 1) */ - ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ - ZTI_MODE_NULL, /* don't create a taskq */ - ZTI_NMODES +ZTI_MODE_FIXED, /* value is # of threads (min 1) */ +ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ +ZTI_MODE_NULL, /* don't create a taskq */ +ZTI_NMODES } zti_modes_t; #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } @@ -118,41 +119,41 @@ typedef enum zti_modes { #define ZTI_ONE ZTI_N(1) typedef struct zio_taskq_info { - zti_modes_t zti_mode; - uint_t zti_value; - uint_t zti_count; +zti_modes_t zti_mode; +uint_t zti_value; +uint_t zti_count; } zio_taskq_info_t; static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { - "iss", "iss_h", "int", "int_h" +"iss", "iss_h", "int", "int_h" }; /* - * This table defines the taskq settings for each ZFS I/O type. When - * initializing a pool, we use this table to create an appropriately sized - * taskq. Some operations are low volume and therefore have a small, static - * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE - * macros. Other operations process a large amount of data; the ZTI_BATCH - * macro causes us to create a taskq oriented for throughput. Some operations - * are so high frequency and short-lived that the taskq itself can become a - * point of lock contention. The ZTI_P(#, #) macro indicates that we need an - * additional degree of parallelism specified by the number of threads per- - * taskq and the number of taskqs; when dispatching an event in this case, the - * particular taskq is chosen at random. - * - * The different taskq priorities are to handle the different contexts (issue - * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that - * need to be handled with minimum delay. - */ +* This table defines the taskq settings for each ZFS I/O type. When +* initializing a pool, we use this table to create an appropriately sized +* taskq. Some operations are low volume and therefore have a small, static +* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE +* macros. Other operations process a large amount of data; the ZTI_BATCH +* macro causes us to create a taskq oriented for throughput. Some operations +* are so high frequency and short-lived that the taskq itself can become a +* point of lock contention. The ZTI_P(#, #) macro indicates that we need an +* additional degree of parallelism specified by the number of threads per- +* taskq and the number of taskqs; when dispatching an event in this case, the +* particular taskq is chosen at random. +* +* The different taskq priorities are to handle the different contexts (issue +* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that +* need to be handled with minimum delay. +*/ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { - /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ - { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ - { ZTI_BATCH, ZTI_N(5), ZTI_P(12, 8), ZTI_N(5) }, /* WRITE */ - { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ - { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ - { ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ +/* ISSUE ISSUE_HIGH INTR INTR_HIGH */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ +{ ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ +{ ZTI_BATCH, ZTI_N(5), ZTI_P(12, 8), ZTI_N(5) }, /* WRITE */ +{ ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ +{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ +{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ }; static void spa_sync_version(void *arg, dmu_tx_t *tx); @@ -168,6084 +169,6119 @@ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ /* - * Report any spa_load_verify errors found, but do not fail spa_load. - * This is used by zdb to analyze non-idle pools. - */ +* Report any spa_load_verify errors found, but do not fail spa_load. +* This is used by zdb to analyze non-idle pools. +*/ boolean_t spa_load_verify_dryrun = B_FALSE; /* - * This (illegal) pool name is used when temporarily importing a spa_t in order - * to get the vdev stats associated with the imported devices. - */ +* This (illegal) pool name is used when temporarily importing a spa_t in order +* to get the vdev stats associated with the imported devices. +*/ #define TRYIMPORT_NAME "$import" /* - * For debugging purposes: print out vdev tree during pool import. - */ +* For debugging purposes: print out vdev tree during pool import. +*/ int spa_load_print_vdev_tree = B_FALSE; /* - * A non-zero value for zfs_max_missing_tvds means that we allow importing - * pools with missing top-level vdevs. This is strictly intended for advanced - * pool recovery cases since missing data is almost inevitable. Pools with - * missing devices can only be imported read-only for safety reasons, and their - * fail-mode will be automatically set to "continue". - * - * With 1 missing vdev we should be able to import the pool and mount all - * datasets. User data that was not modified after the missing device has been - * added should be recoverable. This means that snapshots created prior to the - * addition of that device should be completely intact. - * - * With 2 missing vdevs, some datasets may fail to mount since there are - * dataset statistics that are stored as regular metadata. Some data might be - * recoverable if those vdevs were added recently. - * - * With 3 or more missing vdevs, the pool is severely damaged and MOS entries - * may be missing entirely. Chances of data recovery are very low. Note that - * there are also risks of performing an inadvertent rewind as we might be - * missing all the vdevs with the latest uberblocks. - */ +* A non-zero value for zfs_max_missing_tvds means that we allow importing +* pools with missing top-level vdevs. This is strictly intended for advanced +* pool recovery cases since missing data is almost inevitable. Pools with +* missing devices can only be imported read-only for safety reasons, and their +* fail-mode will be automatically set to "continue". +* +* With 1 missing vdev we should be able to import the pool and mount all +* datasets. User data that was not modified after the missing device has been +* added should be recoverable. This means that snapshots created prior to the +* addition of that device should be completely intact. +* +* With 2 missing vdevs, some datasets may fail to mount since there are +* dataset statistics that are stored as regular metadata. Some data might be +* recoverable if those vdevs were added recently. +* +* With 3 or more missing vdevs, the pool is severely damaged and MOS entries +* may be missing entirely. Chances of data recovery are very low. Note that +* there are also risks of performing an inadvertent rewind as we might be +* missing all the vdevs with the latest uberblocks. +*/ unsigned long zfs_max_missing_tvds = 0; /* - * The parameters below are similar to zfs_max_missing_tvds but are only - * intended for a preliminary open of the pool with an untrusted config which - * might be incomplete or out-dated. - * - * We are more tolerant for pools opened from a cachefile since we could have - * an out-dated cachefile where a device removal was not registered. - * We could have set the limit arbitrarily high but in the case where devices - * are really missing we would want to return the proper error codes; we chose - * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available - * and we get a chance to retrieve the trusted config. - */ +* The parameters below are similar to zfs_max_missing_tvds but are only +* intended for a preliminary open of the pool with an untrusted config which +* might be incomplete or out-dated. +* +* We are more tolerant for pools opened from a cachefile since we could have +* an out-dated cachefile where a device removal was not registered. +* We could have set the limit arbitrarily high but in the case where devices +* are really missing we would want to return the proper error codes; we chose +* SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available +* and we get a chance to retrieve the trusted config. +*/ uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1; /* - * In the case where config was assembled by scanning device paths (/dev/dsks - * by default) we are less tolerant since all the existing devices should have - * been detected and we want spa_load to return the right error codes. - */ +* In the case where config was assembled by scanning device paths (/dev/dsks +* by default) we are less tolerant since all the existing devices should have +* been detected and we want spa_load to return the right error codes. +*/ uint64_t zfs_max_missing_tvds_scan = 0; /* - * Debugging aid that pauses spa_sync() towards the end. - */ +* Debugging aid that pauses spa_sync() towards the end. +*/ boolean_t zfs_pause_spa_sync = B_FALSE; /* - * ========================================================================== - * SPA properties routines - * ========================================================================== - */ +* ========================================================================== +* SPA properties routines +* ========================================================================== +*/ /* - * Add a (source=src, propname=propval) list to an nvlist. - */ +* Add a (source=src, propname=propval) list to an nvlist. +*/ static void spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, - uint64_t intval, zprop_source_t src) +uint64_t intval, zprop_source_t src) { - const char *propname = zpool_prop_to_name(prop); - nvlist_t *propval; +const char *propname = zpool_prop_to_name(prop); +nvlist_t *propval; - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); +VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); +VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); - if (strval != NULL) - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); - else - VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); +if (strval != NULL) + VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); +else + VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); - VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); - nvlist_free(propval); +VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); +nvlist_free(propval); } /* - * Get property values from the spa configuration. - */ +* Get property values from the spa configuration. +*/ static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { - vdev_t *rvd = spa->spa_root_vdev; - dsl_pool_t *pool = spa->spa_dsl_pool; - uint64_t size, alloc, cap, version; - const zprop_source_t src = ZPROP_SRC_NONE; - spa_config_dirent_t *dp; - metaslab_class_t *mc = spa_normal_class(spa); - - ASSERT(MUTEX_HELD(&spa->spa_props_lock)); - - if (rvd != NULL) { - alloc = metaslab_class_get_alloc(mc); - alloc += metaslab_class_get_alloc(spa_special_class(spa)); - alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); - - size = metaslab_class_get_space(mc); - size += metaslab_class_get_space(spa_special_class(spa)); - size += metaslab_class_get_space(spa_dedup_class(spa)); - - spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, - size - alloc, src); - spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, - spa->spa_checkpoint_info.sci_dspace, src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, - metaslab_class_fragmentation(mc), src); - spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, - metaslab_class_expandable_space(mc), src); - spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, - (spa_mode(spa) == FREAD), src); - - cap = (size == 0) ? 0 : (alloc * 100 / size); - spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, - ddt_get_pool_dedup_ratio(spa), src); - - spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, - rvd->vdev_state, src); - - version = spa_version(spa); - if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, - version, ZPROP_SRC_DEFAULT); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, - version, ZPROP_SRC_LOCAL); - } - spa_prop_add_list(*nvp, ZPOOL_PROP_LOAD_GUID, - NULL, spa_load_guid(spa), src); +vdev_t *rvd = spa->spa_root_vdev; +dsl_pool_t *pool = spa->spa_dsl_pool; +uint64_t size, alloc, cap, version; +const zprop_source_t src = ZPROP_SRC_NONE; +spa_config_dirent_t *dp; +metaslab_class_t *mc = spa_normal_class(spa); + +ASSERT(MUTEX_HELD(&spa->spa_props_lock)); + +if (rvd != NULL) { + alloc = metaslab_class_get_alloc(mc); + alloc += metaslab_class_get_alloc(spa_special_class(spa)); + alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); + + size = metaslab_class_get_space(mc); + size += metaslab_class_get_space(spa_special_class(spa)); + size += metaslab_class_get_space(spa_dedup_class(spa)); + + spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, + size - alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, + spa->spa_checkpoint_info.sci_dspace, src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, + metaslab_class_fragmentation(mc), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, + metaslab_class_expandable_space(mc), src); + spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, + (spa_mode(spa) == FREAD), src); + + cap = (size == 0) ? 0 : (alloc * 100 / size); + spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, + ddt_get_pool_dedup_ratio(spa), src); + + spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, + rvd->vdev_state, src); + + version = spa_version(spa); + if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, + version, ZPROP_SRC_DEFAULT); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, + version, ZPROP_SRC_LOCAL); } + spa_prop_add_list(*nvp, ZPOOL_PROP_LOAD_GUID, + NULL, spa_load_guid(spa), src); +} - if (pool != NULL) { - /* - * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, - * when opening pools before this version freedir will be NULL. - */ - if (pool->dp_free_dir != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, - dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, - src); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, - NULL, 0, src); - } +if (pool != NULL) { + /* + * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, + * when opening pools before this version freedir will be NULL. + */ + if (pool->dp_free_dir != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, + dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, + src); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, + NULL, 0, src); + } - if (pool->dp_leak_dir != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, - dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, - src); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, - NULL, 0, src); - } + if (pool->dp_leak_dir != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, + dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, + src); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, + NULL, 0, src); } +} - spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); +spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); - if (spa->spa_comment != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, - 0, ZPROP_SRC_LOCAL); - } +if (spa->spa_comment != NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, + 0, ZPROP_SRC_LOCAL); +} - if (spa->spa_root != NULL) - spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, - 0, ZPROP_SRC_LOCAL); +if (spa->spa_root != NULL) + spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, + 0, ZPROP_SRC_LOCAL); - if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, - MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, - SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); - } +if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); +} else { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); +} - if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, - DNODE_MAX_SIZE, ZPROP_SRC_NONE); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, - DNODE_MIN_SIZE, ZPROP_SRC_NONE); - } +if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, + DNODE_MAX_SIZE, ZPROP_SRC_NONE); +} else { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL, + DNODE_MIN_SIZE, ZPROP_SRC_NONE); +} - if ((dp = list_head(&spa->spa_config_list)) != NULL) { - if (dp->scd_path == NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, - "none", 0, ZPROP_SRC_LOCAL); - } else if (strcmp(dp->scd_path, spa_config_path) != 0) { - spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, - dp->scd_path, 0, ZPROP_SRC_LOCAL); - } +if ((dp = list_head(&spa->spa_config_list)) != NULL) { + if (dp->scd_path == NULL) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + "none", 0, ZPROP_SRC_LOCAL); + } else if (strcmp(dp->scd_path, spa_config_path) != 0) { + spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, + dp->scd_path, 0, ZPROP_SRC_LOCAL); } } +} /* - * Get zpool property values. - */ +* Get zpool property values. +*/ int spa_prop_get(spa_t *spa, nvlist_t **nvp) { - objset_t *mos = spa->spa_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; - int err; - - err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); - if (err) - return (err); - - mutex_enter(&spa->spa_props_lock); +objset_t *mos = spa->spa_meta_objset; +zap_cursor_t zc; +zap_attribute_t za; +int err; - /* - * Get properties from the spa config. - */ - spa_prop_get_config(spa, nvp); - - /* If no pool property object, no more prop to get. */ - if (mos == NULL || spa->spa_pool_props_object == 0) { - mutex_exit(&spa->spa_props_lock); - goto out; - } +err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); +if (err) + return (err); - /* - * Get properties from the MOS pool property object. - */ - for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); - (err = zap_cursor_retrieve(&zc, &za)) == 0; - zap_cursor_advance(&zc)) { - uint64_t intval = 0; - char *strval = NULL; - zprop_source_t src = ZPROP_SRC_DEFAULT; - zpool_prop_t prop; +mutex_enter(&spa->spa_props_lock); - if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL) - continue; +/* + * Get properties from the spa config. + */ +spa_prop_get_config(spa, nvp); - switch (za.za_integer_length) { - case 8: - /* integer property */ - if (za.za_first_integer != - zpool_prop_default_numeric(prop)) - src = ZPROP_SRC_LOCAL; - - if (prop == ZPOOL_PROP_BOOTFS) { - dsl_pool_t *dp; - dsl_dataset_t *ds = NULL; - - dp = spa_get_dsl(spa); - dsl_pool_config_enter(dp, FTAG); - err = dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &ds); - if (err != 0) { - dsl_pool_config_exit(dp, FTAG); - break; - } +/* If no pool property object, no more prop to get. */ +if (mos == NULL || spa->spa_pool_props_object == 0) { + mutex_exit(&spa->spa_props_lock); + goto out; +} - strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, - KM_SLEEP); - dsl_dataset_name(ds, strval); - dsl_dataset_rele(ds, FTAG); +/* + * Get properties from the MOS pool property object. + */ +for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); + (err = zap_cursor_retrieve(&zc, &za)) == 0; + zap_cursor_advance(&zc)) { + uint64_t intval = 0; + char *strval = NULL; + zprop_source_t src = ZPROP_SRC_DEFAULT; + zpool_prop_t prop; + + if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL) + continue; + + switch (za.za_integer_length) { + case 8: + /* integer property */ + if (za.za_first_integer != + zpool_prop_default_numeric(prop)) + src = ZPROP_SRC_LOCAL; + + if (prop == ZPOOL_PROP_BOOTFS) { + dsl_pool_t *dp; + dsl_dataset_t *ds = NULL; + + dp = spa_get_dsl(spa); + dsl_pool_config_enter(dp, FTAG); + err = dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &ds); + if (err != 0) { dsl_pool_config_exit(dp, FTAG); - } else { - strval = NULL; - intval = za.za_first_integer; + break; } - spa_prop_add_list(*nvp, prop, strval, intval, src); + strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, + KM_SLEEP); + dsl_dataset_name(ds, strval); + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + } else { + strval = NULL; + intval = za.za_first_integer; + } - if (strval != NULL) - kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); + spa_prop_add_list(*nvp, prop, strval, intval, src); - break; + if (strval != NULL) + kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); - case 1: - /* string property */ - strval = kmem_alloc(za.za_num_integers, KM_SLEEP); - err = zap_lookup(mos, spa->spa_pool_props_object, - za.za_name, 1, za.za_num_integers, strval); - if (err) { - kmem_free(strval, za.za_num_integers); - break; - } - spa_prop_add_list(*nvp, prop, strval, 0, src); - kmem_free(strval, za.za_num_integers); - break; + break; - default: + case 1: + /* string property */ + strval = kmem_alloc(za.za_num_integers, KM_SLEEP); + err = zap_lookup(mos, spa->spa_pool_props_object, + za.za_name, 1, za.za_num_integers, strval); + if (err) { + kmem_free(strval, za.za_num_integers); break; } + spa_prop_add_list(*nvp, prop, strval, 0, src); + kmem_free(strval, za.za_num_integers); + break; + + default: + break; } - zap_cursor_fini(&zc); - mutex_exit(&spa->spa_props_lock); +} +zap_cursor_fini(&zc); +mutex_exit(&spa->spa_props_lock); out: - if (err && err != ENOENT) { - nvlist_free(*nvp); - *nvp = NULL; - return (err); - } +if (err && err != ENOENT) { + nvlist_free(*nvp); + *nvp = NULL; + return (err); +} - return (0); +return (0); } /* - * Validate the given pool properties nvlist and modify the list - * for the property values to be set. - */ +* Validate the given pool properties nvlist and modify the list +* for the property values to be set. +*/ static int spa_prop_validate(spa_t *spa, nvlist_t *props) { - nvpair_t *elem; - int error = 0, reset_bootfs = 0; - uint64_t objnum = 0; - boolean_t has_feature = B_FALSE; - - elem = NULL; - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - uint64_t intval; - char *strval, *slash, *check, *fname; - const char *propname = nvpair_name(elem); - zpool_prop_t prop = zpool_name_to_prop(propname); - - switch (prop) { - case ZPOOL_PROP_INVAL: - if (!zpool_prop_feature(propname)) { - error = SET_ERROR(EINVAL); - break; - } - - /* - * Sanitize the input. - */ - if (nvpair_type(elem) != DATA_TYPE_UINT64) { - error = SET_ERROR(EINVAL); - break; - } - - if (nvpair_value_uint64(elem, &intval) != 0) { - error = SET_ERROR(EINVAL); - break; - } - - if (intval != 0) { - error = SET_ERROR(EINVAL); - break; - } +nvpair_t *elem; +int error = 0, reset_bootfs = 0; +uint64_t objnum = 0; +boolean_t has_feature = B_FALSE; + +elem = NULL; +while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + uint64_t intval; + char *strval, *slash, *check, *fname; + const char *propname = nvpair_name(elem); + zpool_prop_t prop = zpool_name_to_prop(propname); + + switch (prop) { + case ZPOOL_PROP_INVAL: + if (!zpool_prop_feature(propname)) { + error = SET_ERROR(EINVAL); + break; + } - fname = strchr(propname, '@') + 1; - if (zfeature_lookup_name(fname, NULL) != 0) { - error = SET_ERROR(EINVAL); - break; - } + /* + * Sanitize the input. + */ + if (nvpair_type(elem) != DATA_TYPE_UINT64) { + error = SET_ERROR(EINVAL); + break; + } - has_feature = B_TRUE; + if (nvpair_value_uint64(elem, &intval) != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_VERSION: - error = nvpair_value_uint64(elem, &intval); - if (!error && - (intval < spa_version(spa) || - intval > SPA_VERSION_BEFORE_FEATURES || - has_feature)) - error = SET_ERROR(EINVAL); + if (intval != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_DELEGATION: - case ZPOOL_PROP_AUTOREPLACE: - case ZPOOL_PROP_LISTSNAPS: - case ZPOOL_PROP_AUTOEXPAND: - case ZPOOL_PROP_AUTOTRIM: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > 1) - error = SET_ERROR(EINVAL); + fname = strchr(propname, '@') + 1; + if (zfeature_lookup_name(fname, NULL) != 0) { + error = SET_ERROR(EINVAL); break; + } - case ZPOOL_PROP_MULTIHOST: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > 1) - error = SET_ERROR(EINVAL); + has_feature = B_TRUE; + break; - if (!error && !spa_get_hostid()) - error = SET_ERROR(ENOTSUP); + case ZPOOL_PROP_VERSION: + error = nvpair_value_uint64(elem, &intval); + if (!error && + (intval < spa_version(spa) || + intval > SPA_VERSION_BEFORE_FEATURES || + has_feature)) + error = SET_ERROR(EINVAL); + break; - break; + case ZPOOL_PROP_DELEGATION: + case ZPOOL_PROP_AUTOREPLACE: + case ZPOOL_PROP_LISTSNAPS: + case ZPOOL_PROP_AUTOEXPAND: + case ZPOOL_PROP_AUTOTRIM: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); + break; - case ZPOOL_PROP_BOOTFS: - /* - * If the pool version is less than SPA_VERSION_BOOTFS, - * or the pool is still being created (version == 0), - * the bootfs property cannot be set. - */ - if (spa_version(spa) < SPA_VERSION_BOOTFS) { - error = SET_ERROR(ENOTSUP); - break; - } + case ZPOOL_PROP_MULTIHOST: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); - /* - * Make sure the vdev config is bootable - */ - if (!vdev_is_bootable(spa->spa_root_vdev)) { + if (!error) { + uint32_t hostid = zone_get_hostid(NULL); + if (hostid) + spa->spa_hostid = hostid; + else error = SET_ERROR(ENOTSUP); - break; - } + } - reset_bootfs = 1; + break; - error = nvpair_value_string(elem, &strval); + case ZPOOL_PROP_BOOTFS: + /* + * If the pool version is less than SPA_VERSION_BOOTFS, + * or the pool is still being created (version == 0), + * the bootfs property cannot be set. + */ + if (spa_version(spa) < SPA_VERSION_BOOTFS) { + error = SET_ERROR(ENOTSUP); + break; + } - if (!error) { - objset_t *os; - uint64_t propval; + /* + * Make sure the vdev config is bootable + */ + if (!vdev_is_bootable(spa->spa_root_vdev)) { + error = SET_ERROR(ENOTSUP); + break; + } - if (strval == NULL || strval[0] == '\0') { - objnum = zpool_prop_default_numeric( - ZPOOL_PROP_BOOTFS); - break; - } + reset_bootfs = 1; - error = dmu_objset_hold(strval, FTAG, &os); - if (error != 0) - break; + error = nvpair_value_string(elem, &strval); - /* - * Must be ZPL, and its property settings - * must be supported by GRUB (compression - * is not gzip, and large dnodes are not - * used). - */ + if (!error) { + objset_t *os; + uint64_t propval; - if (dmu_objset_type(os) != DMU_OST_ZFS) { - error = SET_ERROR(ENOTSUP); - } else if ((error = - dsl_prop_get_int_ds(dmu_objset_ds(os), - zfs_prop_to_name(ZFS_PROP_COMPRESSION), - &propval)) == 0 && - !BOOTFS_COMPRESS_VALID(propval)) { - error = SET_ERROR(ENOTSUP); - } else if ((error = - dsl_prop_get_int_ds(dmu_objset_ds(os), - zfs_prop_to_name(ZFS_PROP_DNODESIZE), - &propval)) == 0 && - propval != ZFS_DNSIZE_LEGACY) { - error = SET_ERROR(ENOTSUP); - } else { - objnum = dmu_objset_id(os); - } - dmu_objset_rele(os, FTAG); + if (strval == NULL || strval[0] == '\0') { + objnum = zpool_prop_default_numeric( + ZPOOL_PROP_BOOTFS); + break; } - break; - case ZPOOL_PROP_FAILUREMODE: - error = nvpair_value_uint64(elem, &intval); - if (!error && intval > ZIO_FAILURE_MODE_PANIC) - error = SET_ERROR(EINVAL); + error = dmu_objset_hold(strval, FTAG, &os); + if (error != 0) + break; /* - * This is a special case which only occurs when - * the pool has completely failed. This allows - * the user to change the in-core failmode property - * without syncing it out to disk (I/Os might - * currently be blocked). We do this by returning - * EIO to the caller (spa_prop_set) to trick it - * into thinking we encountered a property validation - * error. + * Must be ZPL, and its property settings + * must be supported by GRUB (compression + * is not gzip, and large dnodes are not + * used). */ - if (!error && spa_suspended(spa)) { - spa->spa_failmode = intval; - error = SET_ERROR(EIO); - } - break; - - case ZPOOL_PROP_CACHEFILE: - if ((error = nvpair_value_string(elem, &strval)) != 0) - break; - - if (strval[0] == '\0') - break; - - if (strcmp(strval, "none") == 0) - break; - if (strval[0] != '/') { - error = SET_ERROR(EINVAL); - break; + if (dmu_objset_type(os) != DMU_OST_ZFS) { + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), + zfs_prop_to_name(ZFS_PROP_COMPRESSION), + &propval)) == 0 && + !BOOTFS_COMPRESS_VALID(propval)) { + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), + zfs_prop_to_name(ZFS_PROP_DNODESIZE), + &propval)) == 0 && + propval != ZFS_DNSIZE_LEGACY) { + error = SET_ERROR(ENOTSUP); + } else { + objnum = dmu_objset_id(os); } + dmu_objset_rele(os, FTAG); + } + break; - slash = strrchr(strval, '/'); - ASSERT(slash != NULL); + case ZPOOL_PROP_FAILUREMODE: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > ZIO_FAILURE_MODE_PANIC) + error = SET_ERROR(EINVAL); - if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || - strcmp(slash, "/..") == 0) - error = SET_ERROR(EINVAL); + /* + * This is a special case which only occurs when + * the pool has completely failed. This allows + * the user to change the in-core failmode property + * without syncing it out to disk (I/Os might + * currently be blocked). We do this by returning + * EIO to the caller (spa_prop_set) to trick it + * into thinking we encountered a property validation + * error. + */ + if (!error && spa_suspended(spa)) { + spa->spa_failmode = intval; + error = SET_ERROR(EIO); + } + break; + + case ZPOOL_PROP_CACHEFILE: + if ((error = nvpair_value_string(elem, &strval)) != 0) break; - case ZPOOL_PROP_COMMENT: - if ((error = nvpair_value_string(elem, &strval)) != 0) - break; - for (check = strval; *check != '\0'; check++) { - if (!isprint(*check)) { - error = SET_ERROR(EINVAL); - break; - } - } - if (strlen(strval) > ZPROP_MAX_COMMENT) - error = SET_ERROR(E2BIG); + if (strval[0] == '\0') break; - case ZPOOL_PROP_DEDUPDITTO: - if (spa_version(spa) < SPA_VERSION_DEDUP) - error = SET_ERROR(ENOTSUP); - else - error = nvpair_value_uint64(elem, &intval); - if (error == 0 && - intval != 0 && intval < ZIO_DEDUPDITTO_MIN) - error = SET_ERROR(EINVAL); + if (strcmp(strval, "none") == 0) break; - default: + if (strval[0] != '/') { + error = SET_ERROR(EINVAL); break; } - if (error) + slash = strrchr(strval, '/'); + ASSERT(slash != NULL); + + if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || + strcmp(slash, "/..") == 0) + error = SET_ERROR(EINVAL); + break; + + case ZPOOL_PROP_COMMENT: + if ((error = nvpair_value_string(elem, &strval)) != 0) break; + for (check = strval; *check != '\0'; check++) { + if (!isprint(*check)) { + error = SET_ERROR(EINVAL); + break; + } + } + if (strlen(strval) > ZPROP_MAX_COMMENT) + error = SET_ERROR(E2BIG); + break; + + case ZPOOL_PROP_DEDUPDITTO: + if (spa_version(spa) < SPA_VERSION_DEDUP) + error = SET_ERROR(ENOTSUP); + else + error = nvpair_value_uint64(elem, &intval); + if (error == 0 && + intval != 0 && intval < ZIO_DEDUPDITTO_MIN) + error = SET_ERROR(EINVAL); + break; + + default: + break; } - if (!error && reset_bootfs) { - error = nvlist_remove(props, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); + if (error) + break; +} + +if (!error && reset_bootfs) { + error = nvlist_remove(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); - if (!error) { - error = nvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); - } + if (!error) { + error = nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); } +} - return (error); +return (error); } void spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) { - char *cachefile; - spa_config_dirent_t *dp; +char *cachefile; +spa_config_dirent_t *dp; - if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), - &cachefile) != 0) - return; +if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), + &cachefile) != 0) + return; - dp = kmem_alloc(sizeof (spa_config_dirent_t), - KM_SLEEP); +dp = kmem_alloc(sizeof (spa_config_dirent_t), + KM_SLEEP); - if (cachefile[0] == '\0') - dp->scd_path = spa_strdup(spa_config_path); - else if (strcmp(cachefile, "none") == 0) - dp->scd_path = NULL; - else - dp->scd_path = spa_strdup(cachefile); +if (cachefile[0] == '\0') + dp->scd_path = spa_strdup(spa_config_path); +else if (strcmp(cachefile, "none") == 0) + dp->scd_path = NULL; +else + dp->scd_path = spa_strdup(cachefile); - list_insert_head(&spa->spa_config_list, dp); - if (need_sync) - spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); +list_insert_head(&spa->spa_config_list, dp); +if (need_sync) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } int spa_prop_set(spa_t *spa, nvlist_t *nvp) { - int error; - nvpair_t *elem = NULL; - boolean_t need_sync = B_FALSE; +int error; +nvpair_t *elem = NULL; +boolean_t need_sync = B_FALSE; - if ((error = spa_prop_validate(spa, nvp)) != 0) - return (error); +if ((error = spa_prop_validate(spa, nvp)) != 0) + return (error); - while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { - zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); +while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { + zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); - if (prop == ZPOOL_PROP_CACHEFILE || - prop == ZPOOL_PROP_ALTROOT || - prop == ZPOOL_PROP_READONLY) - continue; - - if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { - uint64_t ver; + if (prop == ZPOOL_PROP_CACHEFILE || + prop == ZPOOL_PROP_ALTROOT || + prop == ZPOOL_PROP_READONLY) + continue; - if (prop == ZPOOL_PROP_VERSION) { - VERIFY(nvpair_value_uint64(elem, &ver) == 0); - } else { - ASSERT(zpool_prop_feature(nvpair_name(elem))); - ver = SPA_VERSION_FEATURES; - need_sync = B_TRUE; - } + if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { + uint64_t ver; - /* Save time if the version is already set. */ - if (ver == spa_version(spa)) - continue; + if (prop == ZPOOL_PROP_VERSION) { + VERIFY(nvpair_value_uint64(elem, &ver) == 0); + } else { + ASSERT(zpool_prop_feature(nvpair_name(elem))); + ver = SPA_VERSION_FEATURES; + need_sync = B_TRUE; + } - /* - * In addition to the pool directory object, we might - * create the pool properties object, the features for - * read object, the features for write object, or the - * feature descriptions object. - */ - error = dsl_sync_task(spa->spa_name, NULL, - spa_sync_version, &ver, - 6, ZFS_SPACE_CHECK_RESERVED); - if (error) - return (error); + /* Save time if the version is already set. */ + if (ver == spa_version(spa)) continue; - } - need_sync = B_TRUE; - break; + /* + * In addition to the pool directory object, we might + * create the pool properties object, the features for + * read object, the features for write object, or the + * feature descriptions object. + */ + error = dsl_sync_task(spa->spa_name, NULL, + spa_sync_version, &ver, + 6, ZFS_SPACE_CHECK_RESERVED); + if (error) + return (error); + continue; } - if (need_sync) { - return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, - nvp, 6, ZFS_SPACE_CHECK_RESERVED)); - } + need_sync = B_TRUE; + break; +} - return (0); +if (need_sync) { + return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, + nvp, 6, ZFS_SPACE_CHECK_RESERVED)); +} + +return (0); } /* - * If the bootfs property value is dsobj, clear it. - */ +* If the bootfs property value is dsobj, clear it. +*/ void spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) { - if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { - VERIFY(zap_remove(spa->spa_meta_objset, - spa->spa_pool_props_object, - zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); - spa->spa_bootfs = 0; - } +if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { + VERIFY(zap_remove(spa->spa_meta_objset, + spa->spa_pool_props_object, + zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); + spa->spa_bootfs = 0; +} } /*ARGSUSED*/ static int spa_change_guid_check(void *arg, dmu_tx_t *tx) { - ASSERTV(uint64_t *newguid = arg); - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - vdev_t *rvd = spa->spa_root_vdev; - uint64_t vdev_state; +ASSERTV(uint64_t *newguid = arg); +spa_t *spa = dmu_tx_pool(tx)->dp_spa; +vdev_t *rvd = spa->spa_root_vdev; +uint64_t vdev_state; - if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { - int error = (spa_has_checkpoint(spa)) ? - ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; - return (SET_ERROR(error)); - } +if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + int error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (SET_ERROR(error)); +} - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - vdev_state = rvd->vdev_state; - spa_config_exit(spa, SCL_STATE, FTAG); +spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); +vdev_state = rvd->vdev_state; +spa_config_exit(spa, SCL_STATE, FTAG); - if (vdev_state != VDEV_STATE_HEALTHY) - return (SET_ERROR(ENXIO)); +if (vdev_state != VDEV_STATE_HEALTHY) + return (SET_ERROR(ENXIO)); - ASSERT3U(spa_guid(spa), !=, *newguid); +ASSERT3U(spa_guid(spa), !=, *newguid); - return (0); +return (0); } static void spa_change_guid_sync(void *arg, dmu_tx_t *tx) { - uint64_t *newguid = arg; - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - uint64_t oldguid; - vdev_t *rvd = spa->spa_root_vdev; +uint64_t *newguid = arg; +spa_t *spa = dmu_tx_pool(tx)->dp_spa; +uint64_t oldguid; +vdev_t *rvd = spa->spa_root_vdev; - oldguid = spa_guid(spa); +oldguid = spa_guid(spa); - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - rvd->vdev_guid = *newguid; - rvd->vdev_guid_sum += (*newguid - oldguid); - vdev_config_dirty(rvd); - spa_config_exit(spa, SCL_STATE, FTAG); +spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); +rvd->vdev_guid = *newguid; +rvd->vdev_guid_sum += (*newguid - oldguid); +vdev_config_dirty(rvd); +spa_config_exit(spa, SCL_STATE, FTAG); - spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", - oldguid, *newguid); +spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", + oldguid, *newguid); } /* - * Change the GUID for the pool. This is done so that we can later - * re-import a pool built from a clone of our own vdevs. We will modify - * the root vdev's guid, our own pool guid, and then mark all of our - * vdevs dirty. Note that we must make sure that all our vdevs are - * online when we do this, or else any vdevs that weren't present - * would be orphaned from our pool. We are also going to issue a - * sysevent to update any watchers. - */ +* Change the GUID for the pool. This is done so that we can later +* re-import a pool built from a clone of our own vdevs. We will modify +* the root vdev's guid, our own pool guid, and then mark all of our +* vdevs dirty. Note that we must make sure that all our vdevs are +* online when we do this, or else any vdevs that weren't present +* would be orphaned from our pool. We are also going to issue a +* sysevent to update any watchers. +*/ int spa_change_guid(spa_t *spa) { - int error; - uint64_t guid; +int error; +uint64_t guid; - mutex_enter(&spa->spa_vdev_top_lock); - mutex_enter(&spa_namespace_lock); - guid = spa_generate_guid(NULL); +mutex_enter(&spa->spa_vdev_top_lock); +mutex_enter(&spa_namespace_lock); +guid = spa_generate_guid(NULL); - error = dsl_sync_task(spa->spa_name, spa_change_guid_check, - spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); +error = dsl_sync_task(spa->spa_name, spa_change_guid_check, + spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); - if (error == 0) { - spa_write_cachefile(spa, B_FALSE, B_TRUE); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); - } +if (error == 0) { + spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); +} - mutex_exit(&spa_namespace_lock); - mutex_exit(&spa->spa_vdev_top_lock); +mutex_exit(&spa_namespace_lock); +mutex_exit(&spa->spa_vdev_top_lock); - return (error); +return (error); } /* - * ========================================================================== - * SPA state manipulation (open/create/destroy/import/export) - * ========================================================================== - */ +* ========================================================================== +* SPA state manipulation (open/create/destroy/import/export) +* ========================================================================== +*/ static int spa_error_entry_compare(const void *a, const void *b) { - const spa_error_entry_t *sa = (const spa_error_entry_t *)a; - const spa_error_entry_t *sb = (const spa_error_entry_t *)b; - int ret; +const spa_error_entry_t *sa = (const spa_error_entry_t *)a; +const spa_error_entry_t *sb = (const spa_error_entry_t *)b; +int ret; - ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, - sizeof (zbookmark_phys_t)); +ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, + sizeof (zbookmark_phys_t)); - return (AVL_ISIGN(ret)); +return (AVL_ISIGN(ret)); } /* - * Utility function which retrieves copies of the current logs and - * re-initializes them in the process. - */ +* Utility function which retrieves copies of the current logs and +* re-initializes them in the process. +*/ void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) { - ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); +ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); - bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); - bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); +bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); +bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); - avl_create(&spa->spa_errlist_scrub, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); - avl_create(&spa->spa_errlist_last, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); } static void spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { - const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; - enum zti_modes mode = ztip->zti_mode; - uint_t value = ztip->zti_value; - uint_t count = ztip->zti_count; - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - uint_t flags = 0; - boolean_t batch = B_FALSE; - - if (mode == ZTI_MODE_NULL) { - tqs->stqs_count = 0; - tqs->stqs_taskq = NULL; - return; - } +const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; +enum zti_modes mode = ztip->zti_mode; +uint_t value = ztip->zti_value; +uint_t count = ztip->zti_count; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +uint_t flags = 0; +boolean_t batch = B_FALSE; - ASSERT3U(count, >, 0); +if (mode == ZTI_MODE_NULL) { + tqs->stqs_count = 0; + tqs->stqs_taskq = NULL; + return; +} - tqs->stqs_count = count; - tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); +ASSERT3U(count, >, 0); - switch (mode) { - case ZTI_MODE_FIXED: - ASSERT3U(value, >=, 1); - value = MAX(value, 1); - flags |= TASKQ_DYNAMIC; - break; +tqs->stqs_count = count; +tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); - case ZTI_MODE_BATCH: - batch = B_TRUE; - flags |= TASKQ_THREADS_CPU_PCT; - value = MIN(zio_taskq_batch_pct, 100); - break; +switch (mode) { +case ZTI_MODE_FIXED: + ASSERT3U(value, >=, 1); + value = MAX(value, 1); + flags |= TASKQ_DYNAMIC; + break; - default: - panic("unrecognized mode for %s_%s taskq (%u:%u) in " - "spa_activate()", - zio_type_name[t], zio_taskq_types[q], mode, value); - break; - } +case ZTI_MODE_BATCH: + batch = B_TRUE; + flags |= TASKQ_THREADS_CPU_PCT; + value = MIN(zio_taskq_batch_pct, 100); + break; - for (uint_t i = 0; i < count; i++) { - taskq_t *tq; - char name[32]; +default: + panic("unrecognized mode for %s_%s taskq (%u:%u) in " + "spa_activate()", + zio_type_name[t], zio_taskq_types[q], mode, value); + break; +} - (void) snprintf(name, sizeof (name), "%s_%s", - zio_type_name[t], zio_taskq_types[q]); +for (uint_t i = 0; i < count; i++) { + taskq_t *tq; + char name[32]; - if (zio_taskq_sysdc && spa->spa_proc != &p0) { - if (batch) - flags |= TASKQ_DC_BATCH; + (void) snprintf(name, sizeof (name), "%s_%s", + zio_type_name[t], zio_taskq_types[q]); - tq = taskq_create_sysdc(name, value, 50, INT_MAX, - spa->spa_proc, zio_taskq_basedc, flags); - } else { - pri_t pri = maxclsyspri; - /* - * The write issue taskq can be extremely CPU - * intensive. Run it at slightly less important - * priority than the other taskqs. Under Linux this - * means incrementing the priority value on platforms - * like illumos it should be decremented. - */ - if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) - pri++; + if (zio_taskq_sysdc && spa->spa_proc != &p0) { + if (batch) + flags |= TASKQ_DC_BATCH; - tq = taskq_create_proc(name, value, pri, 50, - INT_MAX, spa->spa_proc, flags); - } + tq = taskq_create_sysdc(name, value, 50, INT_MAX, + spa->spa_proc, zio_taskq_basedc, flags); + } else { + pri_t pri = maxclsyspri; + /* + * The write issue taskq can be extremely CPU + * intensive. Run it at slightly less important + * priority than the other taskqs. Under Linux this + * means incrementing the priority value on platforms + * like illumos it should be decremented. + */ + if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) + pri++; - tqs->stqs_taskq[i] = tq; + tq = taskq_create_proc(name, value, pri, 50, + INT_MAX, spa->spa_proc, flags); } + + tqs->stqs_taskq[i] = tq; +} } static void spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - if (tqs->stqs_taskq == NULL) { - ASSERT3U(tqs->stqs_count, ==, 0); - return; - } +if (tqs->stqs_taskq == NULL) { + ASSERT3U(tqs->stqs_count, ==, 0); + return; +} - for (uint_t i = 0; i < tqs->stqs_count; i++) { - ASSERT3P(tqs->stqs_taskq[i], !=, NULL); - taskq_destroy(tqs->stqs_taskq[i]); - } +for (uint_t i = 0; i < tqs->stqs_count; i++) { + ASSERT3P(tqs->stqs_taskq[i], !=, NULL); + taskq_destroy(tqs->stqs_taskq[i]); +} - kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); - tqs->stqs_taskq = NULL; +kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); +tqs->stqs_taskq = NULL; } /* - * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. - * Note that a type may have multiple discrete taskqs to avoid lock contention - * on the taskq itself. In that case we choose which taskq at random by using - * the low bits of gethrtime(). - */ +* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. +* Note that a type may have multiple discrete taskqs to avoid lock contention +* on the taskq itself. In that case we choose which taskq at random by using +* the low bits of gethrtime(). +*/ void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) +task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - taskq_t *tq; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +taskq_t *tq; - ASSERT3P(tqs->stqs_taskq, !=, NULL); - ASSERT3U(tqs->stqs_count, !=, 0); +ASSERT3P(tqs->stqs_taskq, !=, NULL); +ASSERT3U(tqs->stqs_count, !=, 0); - if (tqs->stqs_count == 1) { - tq = tqs->stqs_taskq[0]; - } else { - tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; - } +if (tqs->stqs_count == 1) { + tq = tqs->stqs_taskq[0]; +} else { + tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; +} - taskq_dispatch_ent(tq, func, arg, flags, ent); +taskq_dispatch_ent(tq, func, arg, flags, ent); } /* - * Same as spa_taskq_dispatch_ent() but block on the task until completion. - */ +* Same as spa_taskq_dispatch_ent() but block on the task until completion. +*/ void spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags) +task_func_t *func, void *arg, uint_t flags) { - spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - taskq_t *tq; - taskqid_t id; +spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; +taskq_t *tq; +taskqid_t id; - ASSERT3P(tqs->stqs_taskq, !=, NULL); - ASSERT3U(tqs->stqs_count, !=, 0); +ASSERT3P(tqs->stqs_taskq, !=, NULL); +ASSERT3U(tqs->stqs_count, !=, 0); - if (tqs->stqs_count == 1) { - tq = tqs->stqs_taskq[0]; - } else { - tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; - } +if (tqs->stqs_count == 1) { + tq = tqs->stqs_taskq[0]; +} else { + tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; +} - id = taskq_dispatch(tq, func, arg, flags); - if (id) - taskq_wait_id(tq, id); +id = taskq_dispatch(tq, func, arg, flags); +if (id) + taskq_wait_id(tq, id); } static void spa_create_zio_taskqs(spa_t *spa) { - for (int t = 0; t < ZIO_TYPES; t++) { - for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - spa_taskqs_init(spa, t, q); - } +for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + spa_taskqs_init(spa, t, q); } } +} /* - * Disabled until spa_thread() can be adapted for Linux. - */ +* Disabled until spa_thread() can be adapted for Linux. +*/ #undef HAVE_SPA_THREAD #if defined(_KERNEL) && defined(HAVE_SPA_THREAD) static void spa_thread(void *arg) { - psetid_t zio_taskq_psrset_bind = PS_NONE; - callb_cpr_t cprinfo; - - spa_t *spa = arg; - user_t *pu = PTOU(curproc); +psetid_t zio_taskq_psrset_bind = PS_NONE; +callb_cpr_t cprinfo; - CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, - spa->spa_name); +spa_t *spa = arg; +user_t *pu = PTOU(curproc); - ASSERT(curproc != &p0); - (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), - "zpool-%s", spa->spa_name); - (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); +CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, + spa->spa_name); - /* bind this thread to the requested psrset */ - if (zio_taskq_psrset_bind != PS_NONE) { - pool_lock(); - mutex_enter(&cpu_lock); - mutex_enter(&pidlock); - mutex_enter(&curproc->p_lock); +ASSERT(curproc != &p0); +(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), + "zpool-%s", spa->spa_name); +(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); - if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, - 0, NULL, NULL) == 0) { - curthread->t_bind_pset = zio_taskq_psrset_bind; - } else { - cmn_err(CE_WARN, - "Couldn't bind process for zfs pool \"%s\" to " - "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); - } +/* bind this thread to the requested psrset */ +if (zio_taskq_psrset_bind != PS_NONE) { + pool_lock(); + mutex_enter(&cpu_lock); + mutex_enter(&pidlock); + mutex_enter(&curproc->p_lock); - mutex_exit(&curproc->p_lock); - mutex_exit(&pidlock); - mutex_exit(&cpu_lock); - pool_unlock(); + if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, + 0, NULL, NULL) == 0) { + curthread->t_bind_pset = zio_taskq_psrset_bind; + } else { + cmn_err(CE_WARN, + "Couldn't bind process for zfs pool \"%s\" to " + "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); } - if (zio_taskq_sysdc) { - sysdc_thread_enter(curthread, 100, 0); - } + mutex_exit(&curproc->p_lock); + mutex_exit(&pidlock); + mutex_exit(&cpu_lock); + pool_unlock(); +} - spa->spa_proc = curproc; - spa->spa_did = curthread->t_did; +if (zio_taskq_sysdc) { + sysdc_thread_enter(curthread, 100, 0); +} - spa_create_zio_taskqs(spa); +spa->spa_proc = curproc; +spa->spa_did = curthread->t_did; - mutex_enter(&spa->spa_proc_lock); - ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); +spa_create_zio_taskqs(spa); - spa->spa_proc_state = SPA_PROC_ACTIVE; - cv_broadcast(&spa->spa_proc_cv); +mutex_enter(&spa->spa_proc_lock); +ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); - CALLB_CPR_SAFE_BEGIN(&cprinfo); - while (spa->spa_proc_state == SPA_PROC_ACTIVE) - cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); - CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); +spa->spa_proc_state = SPA_PROC_ACTIVE; +cv_broadcast(&spa->spa_proc_cv); - ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); - spa->spa_proc_state = SPA_PROC_GONE; - spa->spa_proc = &p0; - cv_broadcast(&spa->spa_proc_cv); - CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ +CALLB_CPR_SAFE_BEGIN(&cprinfo); +while (spa->spa_proc_state == SPA_PROC_ACTIVE) + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); +CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); - mutex_enter(&curproc->p_lock); - lwp_exit(); +ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); +spa->spa_proc_state = SPA_PROC_GONE; +spa->spa_proc = &p0; +cv_broadcast(&spa->spa_proc_cv); +CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ + +mutex_enter(&curproc->p_lock); +lwp_exit(); } #endif /* - * Activate an uninitialized pool. - */ +* Activate an uninitialized pool. +*/ static void spa_activate(spa_t *spa, int mode) { - ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); +ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); - spa->spa_state = POOL_STATE_ACTIVE; - spa->spa_mode = mode; +spa->spa_state = POOL_STATE_ACTIVE; +spa->spa_mode = mode; - spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops); - spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops); +spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops); - /* Try to create a covering process */ - mutex_enter(&spa->spa_proc_lock); - ASSERT(spa->spa_proc_state == SPA_PROC_NONE); - ASSERT(spa->spa_proc == &p0); - spa->spa_did = 0; +/* Try to create a covering process */ +mutex_enter(&spa->spa_proc_lock); +ASSERT(spa->spa_proc_state == SPA_PROC_NONE); +ASSERT(spa->spa_proc == &p0); +spa->spa_did = 0; #ifdef HAVE_SPA_THREAD - /* Only create a process if we're going to be around a while. */ - if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { - if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, - NULL, 0) == 0) { - spa->spa_proc_state = SPA_PROC_CREATED; - while (spa->spa_proc_state == SPA_PROC_CREATED) { - cv_wait(&spa->spa_proc_cv, - &spa->spa_proc_lock); - } - ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); - ASSERT(spa->spa_proc != &p0); - ASSERT(spa->spa_did != 0); - } else { +/* Only create a process if we're going to be around a while. */ +if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { + if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, + NULL, 0) == 0) { + spa->spa_proc_state = SPA_PROC_CREATED; + while (spa->spa_proc_state == SPA_PROC_CREATED) { + cv_wait(&spa->spa_proc_cv, + &spa->spa_proc_lock); + } + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + ASSERT(spa->spa_proc != &p0); + ASSERT(spa->spa_did != 0); + } else { #ifdef _KERNEL - cmn_err(CE_WARN, - "Couldn't create process for zfs pool \"%s\"\n", - spa->spa_name); + cmn_err(CE_WARN, + "Couldn't create process for zfs pool \"%s\"\n", + spa->spa_name); #endif - } } +} #endif /* HAVE_SPA_THREAD */ - mutex_exit(&spa->spa_proc_lock); +mutex_exit(&spa->spa_proc_lock); - /* If we didn't create a process, we need to create our taskqs. */ - if (spa->spa_proc == &p0) { - spa_create_zio_taskqs(spa); - } +/* If we didn't create a process, we need to create our taskqs. */ +if (spa->spa_proc == &p0) { + spa_create_zio_taskqs(spa); +} - for (size_t i = 0; i < TXG_SIZE; i++) { - spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL); - } +for (size_t i = 0; i < TXG_SIZE; i++) { + spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL); +} - list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), - offsetof(vdev_t, vdev_config_dirty_node)); - list_create(&spa->spa_evicting_os_list, sizeof (objset_t), - offsetof(objset_t, os_evicting_node)); - list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), - offsetof(vdev_t, vdev_state_dirty_node)); +list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_config_dirty_node)); +list_create(&spa->spa_evicting_os_list, sizeof (objset_t), + offsetof(objset_t, os_evicting_node)); +list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_state_dirty_node)); - txg_list_create(&spa->spa_vdev_txg_list, spa, - offsetof(struct vdev, vdev_txg_node)); +txg_list_create(&spa->spa_vdev_txg_list, spa, + offsetof(struct vdev, vdev_txg_node)); - avl_create(&spa->spa_errlist_scrub, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); - avl_create(&spa->spa_errlist_last, - spa_error_entry_compare, sizeof (spa_error_entry_t), - offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_scrub, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); +avl_create(&spa->spa_errlist_last, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); - spa_keystore_init(&spa->spa_keystore); +spa_keystore_init(&spa->spa_keystore); - /* - * This taskq is used to perform zvol-minor-related tasks - * asynchronously. This has several advantages, including easy - * resolution of various deadlocks (zfsonlinux bug #3681). - * - * The taskq must be single threaded to ensure tasks are always - * processed in the order in which they were dispatched. - * - * A taskq per pool allows one to keep the pools independent. - * This way if one pool is suspended, it will not impact another. - * - * The preferred location to dispatch a zvol minor task is a sync - * task. In this context, there is easy access to the spa_t and minimal - * error handling is required because the sync task must succeed. - */ - spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, - 1, INT_MAX, 0); +/* + * This taskq is used to perform zvol-minor-related tasks + * asynchronously. This has several advantages, including easy + * resolution of various deadlocks (zfsonlinux bug #3681). + * + * The taskq must be single threaded to ensure tasks are always + * processed in the order in which they were dispatched. + * + * A taskq per pool allows one to keep the pools independent. + * This way if one pool is suspended, it will not impact another. + * + * The preferred location to dispatch a zvol minor task is a sync + * task. In this context, there is easy access to the spa_t and minimal + * error handling is required because the sync task must succeed. + */ +spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, + 1, INT_MAX, 0); - /* - * Taskq dedicated to prefetcher threads: this is used to prevent the - * pool traverse code from monopolizing the global (and limited) - * system_taskq by inappropriately scheduling long running tasks on it. - */ - spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, - defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); +/* + * Taskq dedicated to prefetcher threads: this is used to prevent the + * pool traverse code from monopolizing the global (and limited) + * system_taskq by inappropriately scheduling long running tasks on it. + */ +spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); - /* - * The taskq to upgrade datasets in this pool. Currently used by - * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA. - */ - spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus, - defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); +/* + * The taskq to upgrade datasets in this pool. Currently used by + * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA. + */ +spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); } /* - * Opposite of spa_activate(). - */ +* Opposite of spa_activate(). +*/ static void spa_deactivate(spa_t *spa) { - ASSERT(spa->spa_sync_on == B_FALSE); - ASSERT(spa->spa_dsl_pool == NULL); - ASSERT(spa->spa_root_vdev == NULL); - ASSERT(spa->spa_async_zio_root == NULL); - ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); - - spa_evicting_os_wait(spa); +ASSERT(spa->spa_sync_on == B_FALSE); +ASSERT(spa->spa_dsl_pool == NULL); +ASSERT(spa->spa_root_vdev == NULL); +ASSERT(spa->spa_async_zio_root == NULL); +ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); - if (spa->spa_zvol_taskq) { - taskq_destroy(spa->spa_zvol_taskq); - spa->spa_zvol_taskq = NULL; - } +spa_evicting_os_wait(spa); - if (spa->spa_prefetch_taskq) { - taskq_destroy(spa->spa_prefetch_taskq); - spa->spa_prefetch_taskq = NULL; - } +if (spa->spa_zvol_taskq) { + taskq_destroy(spa->spa_zvol_taskq); + spa->spa_zvol_taskq = NULL; +} - if (spa->spa_upgrade_taskq) { - taskq_destroy(spa->spa_upgrade_taskq); - spa->spa_upgrade_taskq = NULL; - } +if (spa->spa_prefetch_taskq) { + taskq_destroy(spa->spa_prefetch_taskq); + spa->spa_prefetch_taskq = NULL; +} - txg_list_destroy(&spa->spa_vdev_txg_list); +if (spa->spa_upgrade_taskq) { + taskq_destroy(spa->spa_upgrade_taskq); + spa->spa_upgrade_taskq = NULL; +} - list_destroy(&spa->spa_config_dirty_list); - list_destroy(&spa->spa_evicting_os_list); - list_destroy(&spa->spa_state_dirty_list); +txg_list_destroy(&spa->spa_vdev_txg_list); - taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); +list_destroy(&spa->spa_config_dirty_list); +list_destroy(&spa->spa_evicting_os_list); +list_destroy(&spa->spa_state_dirty_list); - for (int t = 0; t < ZIO_TYPES; t++) { - for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { - spa_taskqs_fini(spa, t, q); - } - } +taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); - for (size_t i = 0; i < TXG_SIZE; i++) { - ASSERT3P(spa->spa_txg_zio[i], !=, NULL); - VERIFY0(zio_wait(spa->spa_txg_zio[i])); - spa->spa_txg_zio[i] = NULL; +for (int t = 0; t < ZIO_TYPES; t++) { + for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { + spa_taskqs_fini(spa, t, q); } +} - metaslab_class_destroy(spa->spa_normal_class); - spa->spa_normal_class = NULL; +for (size_t i = 0; i < TXG_SIZE; i++) { + ASSERT3P(spa->spa_txg_zio[i], !=, NULL); + VERIFY0(zio_wait(spa->spa_txg_zio[i])); + spa->spa_txg_zio[i] = NULL; +} - metaslab_class_destroy(spa->spa_log_class); - spa->spa_log_class = NULL; +metaslab_class_destroy(spa->spa_normal_class); +spa->spa_normal_class = NULL; - metaslab_class_destroy(spa->spa_special_class); - spa->spa_special_class = NULL; +metaslab_class_destroy(spa->spa_log_class); +spa->spa_log_class = NULL; - metaslab_class_destroy(spa->spa_dedup_class); - spa->spa_dedup_class = NULL; +metaslab_class_destroy(spa->spa_special_class); +spa->spa_special_class = NULL; - /* - * If this was part of an import or the open otherwise failed, we may - * still have errors left in the queues. Empty them just in case. - */ - spa_errlog_drain(spa); - avl_destroy(&spa->spa_errlist_scrub); - avl_destroy(&spa->spa_errlist_last); +metaslab_class_destroy(spa->spa_dedup_class); +spa->spa_dedup_class = NULL; - spa_keystore_fini(&spa->spa_keystore); +/* + * If this was part of an import or the open otherwise failed, we may + * still have errors left in the queues. Empty them just in case. + */ +spa_errlog_drain(spa); +avl_destroy(&spa->spa_errlist_scrub); +avl_destroy(&spa->spa_errlist_last); - spa->spa_state = POOL_STATE_UNINITIALIZED; +spa_keystore_fini(&spa->spa_keystore); - mutex_enter(&spa->spa_proc_lock); - if (spa->spa_proc_state != SPA_PROC_NONE) { - ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); - spa->spa_proc_state = SPA_PROC_DEACTIVATE; - cv_broadcast(&spa->spa_proc_cv); - while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { - ASSERT(spa->spa_proc != &p0); - cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); - } - ASSERT(spa->spa_proc_state == SPA_PROC_GONE); - spa->spa_proc_state = SPA_PROC_NONE; - } - ASSERT(spa->spa_proc == &p0); - mutex_exit(&spa->spa_proc_lock); +spa->spa_state = POOL_STATE_UNINITIALIZED; - /* - * We want to make sure spa_thread() has actually exited the ZFS - * module, so that the module can't be unloaded out from underneath - * it. - */ - if (spa->spa_did != 0) { - thread_join(spa->spa_did); - spa->spa_did = 0; +mutex_enter(&spa->spa_proc_lock); +if (spa->spa_proc_state != SPA_PROC_NONE) { + ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); + spa->spa_proc_state = SPA_PROC_DEACTIVATE; + cv_broadcast(&spa->spa_proc_cv); + while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { + ASSERT(spa->spa_proc != &p0); + cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } + ASSERT(spa->spa_proc_state == SPA_PROC_GONE); + spa->spa_proc_state = SPA_PROC_NONE; } +ASSERT(spa->spa_proc == &p0); +mutex_exit(&spa->spa_proc_lock); /* - * Verify a pool configuration, and construct the vdev tree appropriately. This - * will create all the necessary vdevs in the appropriate layout, with each vdev - * in the CLOSED state. This will prep the pool before open/creation/import. - * All vdev validation is done by the vdev_alloc() routine. + * We want to make sure spa_thread() has actually exited the ZFS + * module, so that the module can't be unloaded out from underneath + * it. */ +if (spa->spa_did != 0) { + thread_join(spa->spa_did); + spa->spa_did = 0; +} +} + +/* +* Verify a pool configuration, and construct the vdev tree appropriately. This +* will create all the necessary vdevs in the appropriate layout, with each vdev +* in the CLOSED state. This will prep the pool before open/creation/import. +* All vdev validation is done by the vdev_alloc() routine. +*/ static int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, - uint_t id, int atype) +uint_t id, int atype) { - nvlist_t **child; - uint_t children; - int error; +nvlist_t **child; +uint_t children; +int error; - if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) - return (error); +if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) + return (error); - if ((*vdp)->vdev_ops->vdev_op_leaf) - return (0); +if ((*vdp)->vdev_ops->vdev_op_leaf) + return (0); - error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children); +error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); - if (error == ENOENT) - return (0); +if (error == ENOENT) + return (0); - if (error) { +if (error) { + vdev_free(*vdp); + *vdp = NULL; + return (SET_ERROR(EINVAL)); +} + +for (int c = 0; c < children; c++) { + vdev_t *vd; + if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, + atype)) != 0) { vdev_free(*vdp); *vdp = NULL; - return (SET_ERROR(EINVAL)); - } - - for (int c = 0; c < children; c++) { - vdev_t *vd; - if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, - atype)) != 0) { - vdev_free(*vdp); - *vdp = NULL; - return (error); - } + return (error); } +} - ASSERT(*vdp != NULL); +ASSERT(*vdp != NULL); - return (0); +return (0); } /* - * Opposite of spa_load(). - */ +* Opposite of spa_load(). +*/ static void spa_unload(spa_t *spa) { - int i; +int i; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa_import_progress_remove(spa_guid(spa)); - spa_load_note(spa, "UNLOADING"); +spa_import_progress_remove(spa_guid(spa)); +spa_load_note(spa, "UNLOADING"); - /* - * Stop async tasks. - */ - spa_async_suspend(spa); +/* + * Stop async tasks. + */ +spa_async_suspend(spa); - if (spa->spa_root_vdev) { - vdev_t *root_vdev = spa->spa_root_vdev; - vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); - vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); - vdev_autotrim_stop_all(spa); - } +if (spa->spa_root_vdev) { + vdev_t *root_vdev = spa->spa_root_vdev; + vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); + vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); + vdev_autotrim_stop_all(spa); +} - /* - * Stop syncing. - */ - if (spa->spa_sync_on) { - txg_sync_stop(spa->spa_dsl_pool); - spa->spa_sync_on = B_FALSE; - } +/* + * Stop syncing. + */ +if (spa->spa_sync_on) { + txg_sync_stop(spa->spa_dsl_pool); + spa->spa_sync_on = B_FALSE; +} - /* - * Even though vdev_free() also calls vdev_metaslab_fini, we need - * to call it earlier, before we wait for async i/o to complete. - * This ensures that there is no async metaslab prefetching, by - * calling taskq_wait(mg_taskq). - */ - if (spa->spa_root_vdev != NULL) { - spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); - for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) - vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); - spa_config_exit(spa, SCL_ALL, spa); - } +/* + * Even though vdev_free() also calls vdev_metaslab_fini, we need + * to call it earlier, before we wait for async i/o to complete. + * This ensures that there is no async metaslab prefetching, by + * calling taskq_wait(mg_taskq). + */ +if (spa->spa_root_vdev != NULL) { + spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); + for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) + vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); + spa_config_exit(spa, SCL_ALL, spa); +} - if (spa->spa_mmp.mmp_thread) - mmp_thread_stop(spa); +if (spa->spa_mmp.mmp_thread) + mmp_thread_stop(spa); - /* - * Wait for any outstanding async I/O to complete. - */ - if (spa->spa_async_zio_root != NULL) { - for (int i = 0; i < max_ncpus; i++) - (void) zio_wait(spa->spa_async_zio_root[i]); - kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); - spa->spa_async_zio_root = NULL; - } +/* + * Wait for any outstanding async I/O to complete. + */ +if (spa->spa_async_zio_root != NULL) { + for (int i = 0; i < max_ncpus; i++) + (void) zio_wait(spa->spa_async_zio_root[i]); + kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); + spa->spa_async_zio_root = NULL; +} - /* XXX move to spa_destroy_aux_threads() once it's upstream */ - if (spa->spa_raidz_expand_zthr != NULL) { - zthr_destroy(spa->spa_raidz_expand_zthr); - spa->spa_raidz_expand_zthr = NULL; - } +/* XXX move to spa_destroy_aux_threads() once it's upstream */ +if (spa->spa_raidz_expand_zthr != NULL) { + zthr_destroy(spa->spa_raidz_expand_zthr); + spa->spa_raidz_expand_zthr = NULL; +} - if (spa->spa_vdev_removal != NULL) { - spa_vdev_removal_destroy(spa->spa_vdev_removal); - spa->spa_vdev_removal = NULL; - } +if (spa->spa_vdev_removal != NULL) { + spa_vdev_removal_destroy(spa->spa_vdev_removal); + spa->spa_vdev_removal = NULL; +} - if (spa->spa_condense_zthr != NULL) { - zthr_destroy(spa->spa_condense_zthr); - spa->spa_condense_zthr = NULL; - } +if (spa->spa_condense_zthr != NULL) { + zthr_destroy(spa->spa_condense_zthr); + spa->spa_condense_zthr = NULL; +} - if (spa->spa_checkpoint_discard_zthr != NULL) { - zthr_destroy(spa->spa_checkpoint_discard_zthr); - spa->spa_checkpoint_discard_zthr = NULL; - } +if (spa->spa_checkpoint_discard_zthr != NULL) { + zthr_destroy(spa->spa_checkpoint_discard_zthr); + spa->spa_checkpoint_discard_zthr = NULL; +} - spa_condense_fini(spa); +spa_condense_fini(spa); - bpobj_close(&spa->spa_deferred_bpobj); +bpobj_close(&spa->spa_deferred_bpobj); - spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); +spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); - /* - * Close all vdevs. - */ - if (spa->spa_root_vdev) - vdev_free(spa->spa_root_vdev); - ASSERT(spa->spa_root_vdev == NULL); +/* + * Close all vdevs. + */ +if (spa->spa_root_vdev) + vdev_free(spa->spa_root_vdev); +ASSERT(spa->spa_root_vdev == NULL); - /* - * Close the dsl pool. - */ - if (spa->spa_dsl_pool) { - dsl_pool_close(spa->spa_dsl_pool); - spa->spa_dsl_pool = NULL; - spa->spa_meta_objset = NULL; - } +/* + * Close the dsl pool. + */ +if (spa->spa_dsl_pool) { + dsl_pool_close(spa->spa_dsl_pool); + spa->spa_dsl_pool = NULL; + spa->spa_meta_objset = NULL; +} - ddt_unload(spa); +ddt_unload(spa); - /* - * Drop and purge level 2 cache - */ - spa_l2cache_drop(spa); +/* + * Drop and purge level 2 cache + */ +spa_l2cache_drop(spa); - for (i = 0; i < spa->spa_spares.sav_count; i++) - vdev_free(spa->spa_spares.sav_vdevs[i]); - if (spa->spa_spares.sav_vdevs) { - kmem_free(spa->spa_spares.sav_vdevs, - spa->spa_spares.sav_count * sizeof (void *)); - spa->spa_spares.sav_vdevs = NULL; - } - if (spa->spa_spares.sav_config) { - nvlist_free(spa->spa_spares.sav_config); - spa->spa_spares.sav_config = NULL; - } - spa->spa_spares.sav_count = 0; +for (i = 0; i < spa->spa_spares.sav_count; i++) + vdev_free(spa->spa_spares.sav_vdevs[i]); +if (spa->spa_spares.sav_vdevs) { + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); + spa->spa_spares.sav_vdevs = NULL; +} +if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; +} +spa->spa_spares.sav_count = 0; - for (i = 0; i < spa->spa_l2cache.sav_count; i++) { - vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); - vdev_free(spa->spa_l2cache.sav_vdevs[i]); - } - if (spa->spa_l2cache.sav_vdevs) { - kmem_free(spa->spa_l2cache.sav_vdevs, - spa->spa_l2cache.sav_count * sizeof (void *)); - spa->spa_l2cache.sav_vdevs = NULL; - } - if (spa->spa_l2cache.sav_config) { - nvlist_free(spa->spa_l2cache.sav_config); - spa->spa_l2cache.sav_config = NULL; - } - spa->spa_l2cache.sav_count = 0; +for (i = 0; i < spa->spa_l2cache.sav_count; i++) { + vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); + vdev_free(spa->spa_l2cache.sav_vdevs[i]); +} +if (spa->spa_l2cache.sav_vdevs) { + kmem_free(spa->spa_l2cache.sav_vdevs, + spa->spa_l2cache.sav_count * sizeof (void *)); + spa->spa_l2cache.sav_vdevs = NULL; +} +if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; +} +spa->spa_l2cache.sav_count = 0; - spa->spa_async_suspended = 0; +spa->spa_async_suspended = 0; - spa->spa_indirect_vdevs_loaded = B_FALSE; +spa->spa_indirect_vdevs_loaded = B_FALSE; - if (spa->spa_comment != NULL) { - spa_strfree(spa->spa_comment); - spa->spa_comment = NULL; - } +if (spa->spa_comment != NULL) { + spa_strfree(spa->spa_comment); + spa->spa_comment = NULL; +} - spa->spa_raidz_expand = NULL; +spa->spa_raidz_expand = NULL; - spa_config_exit(spa, SCL_ALL, spa); +spa_config_exit(spa, SCL_ALL, spa); } /* - * Load (or re-load) the current list of vdevs describing the active spares for - * this pool. When this is called, we have some form of basic information in - * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and - * then re-generate a more complete list including status information. - */ +* Load (or re-load) the current list of vdevs describing the active spares for +* this pool. When this is called, we have some form of basic information in +* 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and +* then re-generate a more complete list including status information. +*/ void spa_load_spares(spa_t *spa) { - nvlist_t **spares; - uint_t nspares; - int i; - vdev_t *vd, *tvd; +nvlist_t **spares; +uint_t nspares; +int i; +vdev_t *vd, *tvd; #ifndef _KERNEL - /* - * zdb opens both the current state of the pool and the - * checkpointed state (if present), with a different spa_t. - * - * As spare vdevs are shared among open pools, we skip loading - * them when we load the checkpointed state of the pool. - */ - if (!spa_writeable(spa)) - return; +/* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As spare vdevs are shared among open pools, we skip loading + * them when we load the checkpointed state of the pool. + */ +if (!spa_writeable(spa)) + return; #endif - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - - /* - * First, close and free any existing spare vdevs. - */ - for (i = 0; i < spa->spa_spares.sav_count; i++) { - vd = spa->spa_spares.sav_vdevs[i]; - - /* Undo the call to spa_activate() below */ - if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, - B_FALSE)) != NULL && tvd->vdev_isspare) - spa_spare_remove(tvd); - vdev_close(vd); - vdev_free(vd); - } - - if (spa->spa_spares.sav_vdevs) - kmem_free(spa->spa_spares.sav_vdevs, - spa->spa_spares.sav_count * sizeof (void *)); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - if (spa->spa_spares.sav_config == NULL) - nspares = 0; - else - VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); +/* + * First, close and free any existing spare vdevs. + */ +for (i = 0; i < spa->spa_spares.sav_count; i++) { + vd = spa->spa_spares.sav_vdevs[i]; - spa->spa_spares.sav_count = (int)nspares; - spa->spa_spares.sav_vdevs = NULL; + /* Undo the call to spa_activate() below */ + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL && tvd->vdev_isspare) + spa_spare_remove(tvd); + vdev_close(vd); + vdev_free(vd); +} - if (nspares == 0) - return; +if (spa->spa_spares.sav_vdevs) + kmem_free(spa->spa_spares.sav_vdevs, + spa->spa_spares.sav_count * sizeof (void *)); - /* - * Construct the array of vdevs, opening them to get status in the - * process. For each spare, there is potentially two different vdev_t - * structures associated with it: one in the list of spares (used only - * for basic validation purposes) and one in the active vdev - * configuration (if it's spared in). During this phase we open and - * validate each vdev on the spare list. If the vdev also exists in the - * active configuration, then we also mark this vdev as an active spare. - */ - spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), - KM_SLEEP); - for (i = 0; i < spa->spa_spares.sav_count; i++) { - VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, - VDEV_ALLOC_SPARE) == 0); - ASSERT(vd != NULL); +if (spa->spa_spares.sav_config == NULL) + nspares = 0; +else + VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - spa->spa_spares.sav_vdevs[i] = vd; +spa->spa_spares.sav_count = (int)nspares; +spa->spa_spares.sav_vdevs = NULL; - if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, - B_FALSE)) != NULL) { - if (!tvd->vdev_isspare) - spa_spare_add(tvd); +if (nspares == 0) + return; - /* - * We only mark the spare active if we were successfully - * able to load the vdev. Otherwise, importing a pool - * with a bad active spare would result in strange - * behavior, because multiple pool would think the spare - * is actively in use. - * - * There is a vulnerability here to an equally bizarre - * circumstance, where a dead active spare is later - * brought back to life (onlined or otherwise). Given - * the rarity of this scenario, and the extra complexity - * it adds, we ignore the possibility. - */ - if (!vdev_is_dead(tvd)) - spa_spare_activate(tvd); - } +/* + * Construct the array of vdevs, opening them to get status in the + * process. For each spare, there is potentially two different vdev_t + * structures associated with it: one in the list of spares (used only + * for basic validation purposes) and one in the active vdev + * configuration (if it's spared in). During this phase we open and + * validate each vdev on the spare list. If the vdev also exists in the + * active configuration, then we also mark this vdev as an active spare. + */ +spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), + KM_SLEEP); +for (i = 0; i < spa->spa_spares.sav_count; i++) { + VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, + VDEV_ALLOC_SPARE) == 0); + ASSERT(vd != NULL); - vd->vdev_top = vd; - vd->vdev_aux = &spa->spa_spares; + spa->spa_spares.sav_vdevs[i] = vd; - if (vdev_open(vd) != 0) - continue; + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL) { + if (!tvd->vdev_isspare) + spa_spare_add(tvd); - if (vdev_validate_aux(vd) == 0) - spa_spare_add(vd); + /* + * We only mark the spare active if we were successfully + * able to load the vdev. Otherwise, importing a pool + * with a bad active spare would result in strange + * behavior, because multiple pool would think the spare + * is actively in use. + * + * There is a vulnerability here to an equally bizarre + * circumstance, where a dead active spare is later + * brought back to life (onlined or otherwise). Given + * the rarity of this scenario, and the extra complexity + * it adds, we ignore the possibility. + */ + if (!vdev_is_dead(tvd)) + spa_spare_activate(tvd); } - /* - * Recompute the stashed list of spares, with status information - * this time. - */ - VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, - DATA_TYPE_NVLIST_ARRAY) == 0); + vd->vdev_top = vd; + vd->vdev_aux = &spa->spa_spares; - spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), - KM_SLEEP); - for (i = 0; i < spa->spa_spares.sav_count; i++) - spares[i] = vdev_config_generate(spa, - spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); - for (i = 0; i < spa->spa_spares.sav_count; i++) - nvlist_free(spares[i]); - kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); + if (vdev_open(vd) != 0) + continue; + + if (vdev_validate_aux(vd) == 0) + spa_spare_add(vd); } /* - * Load (or re-load) the current list of vdevs describing the active l2cache for - * this pool. When this is called, we have some form of basic information in - * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and - * then re-generate a more complete list including status information. - * Devices which are already active have their details maintained, and are - * not re-opened. + * Recompute the stashed list of spares, with status information + * this time. */ +VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, + DATA_TYPE_NVLIST_ARRAY) == 0); + +spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), + KM_SLEEP); +for (i = 0; i < spa->spa_spares.sav_count; i++) + spares[i] = vdev_config_generate(spa, + spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); +VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); +for (i = 0; i < spa->spa_spares.sav_count; i++) + nvlist_free(spares[i]); +kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); +} + +/* +* Load (or re-load) the current list of vdevs describing the active l2cache for +* this pool. When this is called, we have some form of basic information in +* 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and +* then re-generate a more complete list including status information. +* Devices which are already active have their details maintained, and are +* not re-opened. +*/ void spa_load_l2cache(spa_t *spa) { - nvlist_t **l2cache = NULL; - uint_t nl2cache; - int i, j, oldnvdevs; - uint64_t guid; - vdev_t *vd, **oldvdevs, **newvdevs; - spa_aux_vdev_t *sav = &spa->spa_l2cache; +nvlist_t **l2cache = NULL; +uint_t nl2cache; +int i, j, oldnvdevs; +uint64_t guid; +vdev_t *vd, **oldvdevs, **newvdevs; +spa_aux_vdev_t *sav = &spa->spa_l2cache; #ifndef _KERNEL - /* - * zdb opens both the current state of the pool and the - * checkpointed state (if present), with a different spa_t. - * - * As L2 caches are part of the ARC which is shared among open - * pools, we skip loading them when we load the checkpointed - * state of the pool. - */ - if (!spa_writeable(spa)) - return; +/* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As L2 caches are part of the ARC which is shared among open + * pools, we skip loading them when we load the checkpointed + * state of the pool. + */ +if (!spa_writeable(spa)) + return; #endif - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - - oldvdevs = sav->sav_vdevs; - oldnvdevs = sav->sav_count; - sav->sav_vdevs = NULL; - sav->sav_count = 0; - - if (sav->sav_config == NULL) { - nl2cache = 0; - newvdevs = NULL; - goto out; - } +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, - ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); +oldvdevs = sav->sav_vdevs; +oldnvdevs = sav->sav_count; +sav->sav_vdevs = NULL; +sav->sav_count = 0; - /* - * Process new nvlist of vdevs. - */ - for (i = 0; i < nl2cache; i++) { - VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, - &guid) == 0); +if (sav->sav_config == NULL) { + nl2cache = 0; + newvdevs = NULL; + goto out; +} - newvdevs[i] = NULL; - for (j = 0; j < oldnvdevs; j++) { - vd = oldvdevs[j]; - if (vd != NULL && guid == vd->vdev_guid) { - /* - * Retain previous vdev for add/remove ops. - */ - newvdevs[i] = vd; - oldvdevs[j] = NULL; - break; - } - } +VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); +newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); - if (newvdevs[i] == NULL) { +/* + * Process new nvlist of vdevs. + */ +for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, + &guid) == 0); + + newvdevs[i] = NULL; + for (j = 0; j < oldnvdevs; j++) { + vd = oldvdevs[j]; + if (vd != NULL && guid == vd->vdev_guid) { /* - * Create new vdev + * Retain previous vdev for add/remove ops. */ - VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, - VDEV_ALLOC_L2CACHE) == 0); - ASSERT(vd != NULL); newvdevs[i] = vd; + oldvdevs[j] = NULL; + break; + } + } - /* - * Commit this vdev as an l2cache device, - * even if it fails to open. - */ - spa_l2cache_add(vd); + if (newvdevs[i] == NULL) { + /* + * Create new vdev + */ + VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, + VDEV_ALLOC_L2CACHE) == 0); + ASSERT(vd != NULL); + newvdevs[i] = vd; - vd->vdev_top = vd; - vd->vdev_aux = sav; + /* + * Commit this vdev as an l2cache device, + * even if it fails to open. + */ + spa_l2cache_add(vd); - spa_l2cache_activate(vd); + vd->vdev_top = vd; + vd->vdev_aux = sav; - if (vdev_open(vd) != 0) - continue; + spa_l2cache_activate(vd); - (void) vdev_validate_aux(vd); + if (vdev_open(vd) != 0) + continue; - if (!vdev_is_dead(vd)) - l2arc_add_vdev(spa, vd); - } + (void) vdev_validate_aux(vd); + + if (!vdev_is_dead(vd)) + l2arc_add_vdev(spa, vd); } +} - sav->sav_vdevs = newvdevs; - sav->sav_count = (int)nl2cache; +sav->sav_vdevs = newvdevs; +sav->sav_count = (int)nl2cache; - /* - * Recompute the stashed list of l2cache devices, with status - * information this time. - */ - VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, - DATA_TYPE_NVLIST_ARRAY) == 0); +/* + * Recompute the stashed list of l2cache devices, with status + * information this time. + */ +VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, + DATA_TYPE_NVLIST_ARRAY) == 0); - if (sav->sav_count > 0) - l2cache = kmem_alloc(sav->sav_count * sizeof (void *), - KM_SLEEP); - for (i = 0; i < sav->sav_count; i++) - l2cache[i] = vdev_config_generate(spa, - sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); +if (sav->sav_count > 0) + l2cache = kmem_alloc(sav->sav_count * sizeof (void *), + KM_SLEEP); +for (i = 0; i < sav->sav_count; i++) + l2cache[i] = vdev_config_generate(spa, + sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); +VERIFY(nvlist_add_nvlist_array(sav->sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); out: - /* - * Purge vdevs that were dropped - */ - for (i = 0; i < oldnvdevs; i++) { - uint64_t pool; - - vd = oldvdevs[i]; - if (vd != NULL) { - ASSERT(vd->vdev_isl2cache); - - if (spa_l2cache_exists(vd->vdev_guid, &pool) && - pool != 0ULL && l2arc_vdev_present(vd)) - l2arc_remove_vdev(vd); - vdev_clear_stats(vd); - vdev_free(vd); - } +/* + * Purge vdevs that were dropped + */ +for (i = 0; i < oldnvdevs; i++) { + uint64_t pool; + + vd = oldvdevs[i]; + if (vd != NULL) { + ASSERT(vd->vdev_isl2cache); + + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); + vdev_clear_stats(vd); + vdev_free(vd); } +} - if (oldvdevs) - kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); +if (oldvdevs) + kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); - for (i = 0; i < sav->sav_count; i++) - nvlist_free(l2cache[i]); - if (sav->sav_count) - kmem_free(l2cache, sav->sav_count * sizeof (void *)); +for (i = 0; i < sav->sav_count; i++) + nvlist_free(l2cache[i]); +if (sav->sav_count) + kmem_free(l2cache, sav->sav_count * sizeof (void *)); } static int load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { - dmu_buf_t *db; - char *packed = NULL; - size_t nvsize = 0; - int error; - *value = NULL; +dmu_buf_t *db; +char *packed = NULL; +size_t nvsize = 0; +int error; +*value = NULL; - error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); - if (error) - return (error); +error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); +if (error) + return (error); - nvsize = *(uint64_t *)db->db_data; - dmu_buf_rele(db, FTAG); +nvsize = *(uint64_t *)db->db_data; +dmu_buf_rele(db, FTAG); - packed = vmem_alloc(nvsize, KM_SLEEP); - error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, - DMU_READ_PREFETCH); - if (error == 0) - error = nvlist_unpack(packed, nvsize, value, 0); - vmem_free(packed, nvsize); +packed = vmem_alloc(nvsize, KM_SLEEP); +error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, + DMU_READ_PREFETCH); +if (error == 0) + error = nvlist_unpack(packed, nvsize, value, 0); +vmem_free(packed, nvsize); - return (error); +return (error); } /* - * Concrete top-level vdevs that are not missing and are not logs. At every - * spa_sync we write new uberblocks to at least SPA_SYNC_MIN_VDEVS core tvds. - */ +* Concrete top-level vdevs that are not missing and are not logs. At every +* spa_sync we write new uberblocks to at least SPA_SYNC_MIN_VDEVS core tvds. +*/ static uint64_t spa_healthy_core_tvds(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; - uint64_t tvds = 0; +vdev_t *rvd = spa->spa_root_vdev; +uint64_t tvds = 0; - for (uint64_t i = 0; i < rvd->vdev_children; i++) { - vdev_t *vd = rvd->vdev_child[i]; - if (vd->vdev_islog) - continue; - if (vdev_is_concrete(vd) && !vdev_is_dead(vd)) - tvds++; - } +for (uint64_t i = 0; i < rvd->vdev_children; i++) { + vdev_t *vd = rvd->vdev_child[i]; + if (vd->vdev_islog) + continue; + if (vdev_is_concrete(vd) && !vdev_is_dead(vd)) + tvds++; +} - return (tvds); +return (tvds); } /* - * Checks to see if the given vdev could not be opened, in which case we post a - * sysevent to notify the autoreplace code that the device has been removed. - */ +* Checks to see if the given vdev could not be opened, in which case we post a +* sysevent to notify the autoreplace code that the device has been removed. +*/ static void spa_check_removed(vdev_t *vd) { - for (uint64_t c = 0; c < vd->vdev_children; c++) - spa_check_removed(vd->vdev_child[c]); +for (uint64_t c = 0; c < vd->vdev_children; c++) + spa_check_removed(vd->vdev_child[c]); - if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && - vdev_is_concrete(vd)) { - zfs_post_autoreplace(vd->vdev_spa, vd); - spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); - } +if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && + vdev_is_concrete(vd)) { + zfs_post_autoreplace(vd->vdev_spa, vd); + spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); +} } static int spa_check_for_missing_logs(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If we're doing a normal import, then build up any additional - * diagnostic information about missing log devices. - * We'll pass this up to the user for further processing. - */ - if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { - nvlist_t **child, *nv; - uint64_t idx = 0; +/* + * If we're doing a normal import, then build up any additional + * diagnostic information about missing log devices. + * We'll pass this up to the user for further processing. + */ +if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { + nvlist_t **child, *nv; + uint64_t idx = 0; - child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *), - KM_SLEEP); - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *), + KM_SLEEP); + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; - /* - * We consider a device as missing only if it failed - * to open (i.e. offline or faulted is not considered - * as missing). - */ - if (tvd->vdev_islog && - tvd->vdev_state == VDEV_STATE_CANT_OPEN) { - child[idx++] = vdev_config_generate(spa, tvd, - B_FALSE, VDEV_CONFIG_MISSING); - } + /* + * We consider a device as missing only if it failed + * to open (i.e. offline or faulted is not considered + * as missing). + */ + if (tvd->vdev_islog && + tvd->vdev_state == VDEV_STATE_CANT_OPEN) { + child[idx++] = vdev_config_generate(spa, tvd, + B_FALSE, VDEV_CONFIG_MISSING); } + } + + if (idx > 0) { + fnvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, idx); + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_MISSING_DEVICES, nv); - if (idx > 0) { - fnvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, idx); - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_MISSING_DEVICES, nv); + for (uint64_t i = 0; i < idx; i++) + nvlist_free(child[i]); + } + nvlist_free(nv); + kmem_free(child, rvd->vdev_children * sizeof (char **)); - for (uint64_t i = 0; i < idx; i++) - nvlist_free(child[i]); - } - nvlist_free(nv); - kmem_free(child, rvd->vdev_children * sizeof (char **)); + if (idx > 0) { + spa_load_failed(spa, "some log devices are missing"); + vdev_dbgmsg_print_tree(rvd, 2); + return (SET_ERROR(ENXIO)); + } +} else { + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; - if (idx > 0) { - spa_load_failed(spa, "some log devices are missing"); + if (tvd->vdev_islog && + tvd->vdev_state == VDEV_STATE_CANT_OPEN) { + spa_set_log_state(spa, SPA_LOG_CLEAR); + spa_load_note(spa, "some log devices are " + "missing, ZIL is dropped."); vdev_dbgmsg_print_tree(rvd, 2); - return (SET_ERROR(ENXIO)); - } - } else { - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - - if (tvd->vdev_islog && - tvd->vdev_state == VDEV_STATE_CANT_OPEN) { - spa_set_log_state(spa, SPA_LOG_CLEAR); - spa_load_note(spa, "some log devices are " - "missing, ZIL is dropped."); - vdev_dbgmsg_print_tree(rvd, 2); - break; - } + break; } } +} - return (0); +return (0); } /* - * Check for missing log devices - */ +* Check for missing log devices +*/ static boolean_t spa_check_logs(spa_t *spa) { - boolean_t rv = B_FALSE; - dsl_pool_t *dp = spa_get_dsl(spa); +boolean_t rv = B_FALSE; +dsl_pool_t *dp = spa_get_dsl(spa); - switch (spa->spa_log_state) { - default: - break; - case SPA_LOG_MISSING: - /* need to recheck in case slog has been restored */ - case SPA_LOG_UNKNOWN: - rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, - zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); - if (rv) - spa_set_log_state(spa, SPA_LOG_MISSING); - break; - } - return (rv); +switch (spa->spa_log_state) { +default: + break; +case SPA_LOG_MISSING: + /* need to recheck in case slog has been restored */ +case SPA_LOG_UNKNOWN: + rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); + if (rv) + spa_set_log_state(spa, SPA_LOG_MISSING); + break; +} +return (rv); } static boolean_t spa_passivate_log(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; - boolean_t slog_found = B_FALSE; +vdev_t *rvd = spa->spa_root_vdev; +boolean_t slog_found = B_FALSE; - ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); +ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); - if (!spa_has_slogs(spa)) - return (B_FALSE); +if (!spa_has_slogs(spa)) + return (B_FALSE); - for (int c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - metaslab_group_t *mg = tvd->vdev_mg; +for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; - if (tvd->vdev_islog) { - metaslab_group_passivate(mg); - slog_found = B_TRUE; - } + if (tvd->vdev_islog) { + metaslab_group_passivate(mg); + slog_found = B_TRUE; } +} - return (slog_found); +return (slog_found); } static void spa_activate_log(spa_t *spa) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); +ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); - for (int c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - metaslab_group_t *mg = tvd->vdev_mg; +for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; - if (tvd->vdev_islog) - metaslab_group_activate(mg); - } + if (tvd->vdev_islog) + metaslab_group_activate(mg); +} } int spa_reset_logs(spa_t *spa) { - int error; +int error; - error = dmu_objset_find(spa_name(spa), zil_reset, - NULL, DS_FIND_CHILDREN); - if (error == 0) { - /* - * We successfully offlined the log device, sync out the - * current txg so that the "stubby" block can be removed - * by zil_sync(). - */ - txg_wait_synced(spa->spa_dsl_pool, 0); - } - return (error); +error = dmu_objset_find(spa_name(spa), zil_reset, + NULL, DS_FIND_CHILDREN); +if (error == 0) { + /* + * We successfully offlined the log device, sync out the + * current txg so that the "stubby" block can be removed + * by zil_sync(). + */ + txg_wait_synced(spa->spa_dsl_pool, 0); +} +return (error); } static void spa_aux_check_removed(spa_aux_vdev_t *sav) { - for (int i = 0; i < sav->sav_count; i++) - spa_check_removed(sav->sav_vdevs[i]); +for (int i = 0; i < sav->sav_count; i++) + spa_check_removed(sav->sav_vdevs[i]); } void spa_claim_notify(zio_t *zio) { - spa_t *spa = zio->io_spa; +spa_t *spa = zio->io_spa; - if (zio->io_error) - return; +if (zio->io_error) + return; - mutex_enter(&spa->spa_props_lock); /* any mutex will do */ - if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) - spa->spa_claim_max_txg = zio->io_bp->blk_birth; - mutex_exit(&spa->spa_props_lock); +mutex_enter(&spa->spa_props_lock); /* any mutex will do */ +if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) + spa->spa_claim_max_txg = zio->io_bp->blk_birth; +mutex_exit(&spa->spa_props_lock); } typedef struct spa_load_error { - uint64_t sle_meta_count; - uint64_t sle_data_count; +uint64_t sle_meta_count; +uint64_t sle_data_count; } spa_load_error_t; static void spa_load_verify_done(zio_t *zio) { - blkptr_t *bp = zio->io_bp; - spa_load_error_t *sle = zio->io_private; - dmu_object_type_t type = BP_GET_TYPE(bp); - int error = zio->io_error; - spa_t *spa = zio->io_spa; - - abd_free(zio->io_abd); - if (error) { - if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && - type != DMU_OT_INTENT_LOG) - atomic_inc_64(&sle->sle_meta_count); - else - atomic_inc_64(&sle->sle_data_count); - } +blkptr_t *bp = zio->io_bp; +spa_load_error_t *sle = zio->io_private; +dmu_object_type_t type = BP_GET_TYPE(bp); +int error = zio->io_error; +spa_t *spa = zio->io_spa; + +abd_free(zio->io_abd); +if (error) { + if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && + type != DMU_OT_INTENT_LOG) + atomic_inc_64(&sle->sle_meta_count); + else + atomic_inc_64(&sle->sle_data_count); +} - mutex_enter(&spa->spa_scrub_lock); - spa->spa_load_verify_ios--; - cv_broadcast(&spa->spa_scrub_io_cv); - mutex_exit(&spa->spa_scrub_lock); +mutex_enter(&spa->spa_scrub_lock); +spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); +cv_broadcast(&spa->spa_scrub_io_cv); +mutex_exit(&spa->spa_scrub_lock); } /* - * Maximum number of concurrent scrub i/os to create while verifying - * a pool while importing it. - */ -int spa_load_verify_maxinflight = 10000; +* Maximum number of inflight bytes is the log2 fraction of the arc size. +* By default, we set it to 1/16th of the arc. +*/ +int spa_load_verify_shift = 4; int spa_load_verify_metadata = B_TRUE; int spa_load_verify_data = B_TRUE; /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) +const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) - return (0); - /* - * Note: normally this routine will not be called if - * spa_load_verify_metadata is not set. However, it may be useful - * to manually set the flag after the traversal has begun. - */ - if (!spa_load_verify_metadata) - return (0); - if (!BP_IS_METADATA(bp) && !spa_load_verify_data) - return (0); +if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + return (0); +/* + * Note: normally this routine will not be called if + * spa_load_verify_metadata is not set. However, it may be useful + * to manually set the flag after the traversal has begun. + */ +if (!spa_load_verify_metadata) + return (0); +if (!BP_IS_METADATA(bp) && !spa_load_verify_data) + return (0); - zio_t *rio = arg; - size_t size = BP_GET_PSIZE(bp); +uint64_t maxinflight_bytes = + arc_target_bytes() >> spa_load_verify_shift; +zio_t *rio = arg; +size_t size = BP_GET_PSIZE(bp); - mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_ios >= spa_load_verify_maxinflight) - cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_load_verify_ios++; - mutex_exit(&spa->spa_scrub_lock); +mutex_enter(&spa->spa_scrub_lock); +while (spa->spa_load_verify_bytes >= maxinflight_bytes) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); +spa->spa_load_verify_bytes += size; +mutex_exit(&spa->spa_scrub_lock); - zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, - spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | - ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); - return (0); +zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, + spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); +return (0); } /* ARGSUSED */ int verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) - return (SET_ERROR(ENAMETOOLONG)); +if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) + return (SET_ERROR(ENAMETOOLONG)); - return (0); +return (0); } static int spa_load_verify(spa_t *spa) { - zio_t *rio; - spa_load_error_t sle = { 0 }; - zpool_load_policy_t policy; - boolean_t verify_ok = B_FALSE; - int error = 0; +zio_t *rio; +spa_load_error_t sle = { 0 }; +zpool_load_policy_t policy; +boolean_t verify_ok = B_FALSE; +int error = 0; - zpool_get_load_policy(spa->spa_config, &policy); +zpool_get_load_policy(spa->spa_config, &policy); - if (policy.zlp_rewind & ZPOOL_NEVER_REWIND) - return (0); +if (policy.zlp_rewind & ZPOOL_NEVER_REWIND) + return (0); - dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); - error = dmu_objset_find_dp(spa->spa_dsl_pool, - spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, - DS_FIND_CHILDREN); - dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); - if (error != 0) - return (error); +dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); +error = dmu_objset_find_dp(spa->spa_dsl_pool, + spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, + DS_FIND_CHILDREN); +dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); +if (error != 0) + return (error); - rio = zio_root(spa, NULL, &sle, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); +rio = zio_root(spa, NULL, &sle, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); - if (spa_load_verify_metadata) { - if (spa->spa_extreme_rewind) { - spa_load_note(spa, "performing a complete scan of the " - "pool since extreme rewind is on. This may take " - "a very long time.\n (spa_load_verify_data=%u, " - "spa_load_verify_metadata=%u)", - spa_load_verify_data, spa_load_verify_metadata); - } - error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | - TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); +if (spa_load_verify_metadata) { + if (spa->spa_extreme_rewind) { + spa_load_note(spa, "performing a complete scan of the " + "pool since extreme rewind is on. This may take " + "a very long time.\n (spa_load_verify_data=%u, " + "spa_load_verify_metadata=%u)", + spa_load_verify_data, spa_load_verify_metadata); } - (void) zio_wait(rio); + error = traverse_pool(spa, spa->spa_verify_min_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); +} - spa->spa_load_meta_errors = sle.sle_meta_count; - spa->spa_load_data_errors = sle.sle_data_count; +(void) zio_wait(rio); +ASSERT0(spa->spa_load_verify_bytes); - if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { - spa_load_note(spa, "spa_load_verify found %llu metadata errors " - "and %llu data errors", (u_longlong_t)sle.sle_meta_count, - (u_longlong_t)sle.sle_data_count); - } +spa->spa_load_meta_errors = sle.sle_meta_count; +spa->spa_load_data_errors = sle.sle_data_count; - if (spa_load_verify_dryrun || - (!error && sle.sle_meta_count <= policy.zlp_maxmeta && - sle.sle_data_count <= policy.zlp_maxdata)) { - int64_t loss = 0; +if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { + spa_load_note(spa, "spa_load_verify found %llu metadata errors " + "and %llu data errors", (u_longlong_t)sle.sle_meta_count, + (u_longlong_t)sle.sle_data_count); +} - verify_ok = B_TRUE; - spa->spa_load_txg = spa->spa_uberblock.ub_txg; - spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; +if (spa_load_verify_dryrun || + (!error && sle.sle_meta_count <= policy.zlp_maxmeta && + sle.sle_data_count <= policy.zlp_maxdata)) { + int64_t loss = 0; - loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; - VERIFY(nvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); - VERIFY(nvlist_add_int64(spa->spa_load_info, - ZPOOL_CONFIG_REWIND_TIME, loss) == 0); - VERIFY(nvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); - } else { - spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; - } + verify_ok = B_TRUE; + spa->spa_load_txg = spa->spa_uberblock.ub_txg; + spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; - if (spa_load_verify_dryrun) - return (0); + loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); + VERIFY(nvlist_add_int64(spa->spa_load_info, + ZPOOL_CONFIG_REWIND_TIME, loss) == 0); + VERIFY(nvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); +} else { + spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; +} - if (error) { - if (error != ENXIO && error != EIO) - error = SET_ERROR(EIO); - return (error); - } +if (spa_load_verify_dryrun) + return (0); + +if (error) { + if (error != ENXIO && error != EIO) + error = SET_ERROR(EIO); + return (error); +} - return (verify_ok ? 0 : EIO); +return (verify_ok ? 0 : EIO); } /* - * Find a value in the pool props object. - */ +* Find a value in the pool props object. +*/ static void spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) { - (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, - zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); +(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, + zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); } /* - * Find a value in the pool directory object. - */ +* Find a value in the pool directory object. +*/ static int spa_dir_prop(spa_t *spa, const char *name, uint64_t *val, boolean_t log_enoent) { - int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - name, sizeof (uint64_t), 1, val); +int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + name, sizeof (uint64_t), 1, val); - if (error != 0 && (error != ENOENT || log_enoent)) { - spa_load_failed(spa, "couldn't get '%s' value in MOS directory " - "[error=%d]", name, error); - } +if (error != 0 && (error != ENOENT || log_enoent)) { + spa_load_failed(spa, "couldn't get '%s' value in MOS directory " + "[error=%d]", name, error); +} - return (error); +return (error); } static int spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) { - vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); - return (SET_ERROR(err)); +vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); +return (SET_ERROR(err)); } static void spa_spawn_aux_threads(spa_t *spa) { - ASSERT(spa_writeable(spa)); +ASSERT(spa_writeable(spa)); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa_start_raidz_expansion_thread(spa); - spa_start_indirect_condensing_thread(spa); +spa_start_raidz_expansion_thread(spa); +spa_start_indirect_condensing_thread(spa); - ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); - spa->spa_checkpoint_discard_zthr = - zthr_create(spa_checkpoint_discard_thread_check, - spa_checkpoint_discard_thread, spa); +ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); +spa->spa_checkpoint_discard_zthr = + zthr_create(spa_checkpoint_discard_thread_check, + spa_checkpoint_discard_thread, spa); } /* - * Fix up config after a partly-completed split. This is done with the - * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off - * pool have that entry in their config, but only the splitting one contains - * a list of all the guids of the vdevs that are being split off. - * - * This function determines what to do with that list: either rejoin - * all the disks to the pool, or complete the splitting process. To attempt - * the rejoin, each disk that is offlined is marked online again, and - * we do a reopen() call. If the vdev label for every disk that was - * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) - * then we call vdev_split() on each disk, and complete the split. - * - * Otherwise we leave the config alone, with all the vdevs in place in - * the original pool. - */ +* Fix up config after a partly-completed split. This is done with the +* ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off +* pool have that entry in their config, but only the splitting one contains +* a list of all the guids of the vdevs that are being split off. +* +* This function determines what to do with that list: either rejoin +* all the disks to the pool, or complete the splitting process. To attempt +* the rejoin, each disk that is offlined is marked online again, and +* we do a reopen() call. If the vdev label for every disk that was +* marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) +* then we call vdev_split() on each disk, and complete the split. +* +* Otherwise we leave the config alone, with all the vdevs in place in +* the original pool. +*/ static void spa_try_repair(spa_t *spa, nvlist_t *config) { - uint_t extracted; - uint64_t *glist; - uint_t i, gcount; - nvlist_t *nvl; - vdev_t **vd; - boolean_t attempt_reopen; +uint_t extracted; +uint64_t *glist; +uint_t i, gcount; +nvlist_t *nvl; +vdev_t **vd; +boolean_t attempt_reopen; - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) - return; +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) + return; - /* check that the config is complete */ - if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, - &glist, &gcount) != 0) - return; +/* check that the config is complete */ +if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, + &glist, &gcount) != 0) + return; - vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); +vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); - /* attempt to online all the vdevs & validate */ - attempt_reopen = B_TRUE; - for (i = 0; i < gcount; i++) { - if (glist[i] == 0) /* vdev is hole */ - continue; +/* attempt to online all the vdevs & validate */ +attempt_reopen = B_TRUE; +for (i = 0; i < gcount; i++) { + if (glist[i] == 0) /* vdev is hole */ + continue; - vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); - if (vd[i] == NULL) { - /* - * Don't bother attempting to reopen the disks; - * just do the split. - */ - attempt_reopen = B_FALSE; - } else { - /* attempt to re-online it */ - vd[i]->vdev_offline = B_FALSE; - } + vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); + if (vd[i] == NULL) { + /* + * Don't bother attempting to reopen the disks; + * just do the split. + */ + attempt_reopen = B_FALSE; + } else { + /* attempt to re-online it */ + vd[i]->vdev_offline = B_FALSE; } +} - if (attempt_reopen) { - vdev_reopen(spa->spa_root_vdev); +if (attempt_reopen) { + vdev_reopen(spa->spa_root_vdev); - /* check each device to see what state it's in */ - for (extracted = 0, i = 0; i < gcount; i++) { - if (vd[i] != NULL && - vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) - break; - ++extracted; - } + /* check each device to see what state it's in */ + for (extracted = 0, i = 0; i < gcount; i++) { + if (vd[i] != NULL && + vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) + break; + ++extracted; } +} - /* - * If every disk has been moved to the new pool, or if we never - * even attempted to look at them, then we split them off for - * good. - */ - if (!attempt_reopen || gcount == extracted) { - for (i = 0; i < gcount; i++) - if (vd[i] != NULL) - vdev_split(vd[i]); - vdev_reopen(spa->spa_root_vdev); - } +/* + * If every disk has been moved to the new pool, or if we never + * even attempted to look at them, then we split them off for + * good. + */ +if (!attempt_reopen || gcount == extracted) { + for (i = 0; i < gcount; i++) + if (vd[i] != NULL) + vdev_split(vd[i]); + vdev_reopen(spa->spa_root_vdev); +} - kmem_free(vd, gcount * sizeof (vdev_t *)); +kmem_free(vd, gcount * sizeof (vdev_t *)); } static int spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) { - char *ereport = FM_EREPORT_ZFS_POOL; - int error; +char *ereport = FM_EREPORT_ZFS_POOL; +int error; - spa->spa_load_state = state; - (void) spa_import_progress_set_state(spa_guid(spa), - spa_load_state(spa)); +spa->spa_load_state = state; +(void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); - gethrestime(&spa->spa_loaded_ts); - error = spa_load_impl(spa, type, &ereport); +gethrestime(&spa->spa_loaded_ts); +error = spa_load_impl(spa, type, &ereport); - /* - * Don't count references from objsets that are already closed - * and are making their way through the eviction process. - */ - spa_evicting_os_wait(spa); - spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); - if (error) { - if (error != EEXIST) { - spa->spa_loaded_ts.tv_sec = 0; - spa->spa_loaded_ts.tv_nsec = 0; - } - if (error != EBADF) { - zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); - } +/* + * Don't count references from objsets that are already closed + * and are making their way through the eviction process. + */ +spa_evicting_os_wait(spa); +spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); +if (error) { + if (error != EEXIST) { + spa->spa_loaded_ts.tv_sec = 0; + spa->spa_loaded_ts.tv_nsec = 0; } - spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; - spa->spa_ena = 0; + if (error != EBADF) { + zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); + } +} +spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; +spa->spa_ena = 0; - (void) spa_import_progress_set_state(spa_guid(spa), - spa_load_state(spa)); +(void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); - return (error); +return (error); } #ifdef ZFS_DEBUG /* - * Count the number of per-vdev ZAPs associated with all of the vdevs in the - * vdev tree rooted in the given vd, and ensure that each ZAP is present in the - * spa's per-vdev ZAP list. - */ +* Count the number of per-vdev ZAPs associated with all of the vdevs in the +* vdev tree rooted in the given vd, and ensure that each ZAP is present in the +* spa's per-vdev ZAP list. +*/ static uint64_t vdev_count_verify_zaps(vdev_t *vd) { - spa_t *spa = vd->vdev_spa; - uint64_t total = 0; +spa_t *spa = vd->vdev_spa; +uint64_t total = 0; - if (vd->vdev_top_zap != 0) { - total++; - ASSERT0(zap_lookup_int(spa->spa_meta_objset, - spa->spa_all_vdev_zaps, vd->vdev_top_zap)); - } - if (vd->vdev_leaf_zap != 0) { - total++; - ASSERT0(zap_lookup_int(spa->spa_meta_objset, - spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); - } +if (vd->vdev_top_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_top_zap)); +} +if (vd->vdev_leaf_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); +} - for (uint64_t i = 0; i < vd->vdev_children; i++) { - total += vdev_count_verify_zaps(vd->vdev_child[i]); - } +for (uint64_t i = 0; i < vd->vdev_children; i++) { + total += vdev_count_verify_zaps(vd->vdev_child[i]); +} - return (total); +return (total); } #endif /* - * Determine whether the activity check is required. - */ +* Determine whether the activity check is required. +*/ static boolean_t spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, - nvlist_t *config) +nvlist_t *config) { - uint64_t state = 0; - uint64_t hostid = 0; - uint64_t tryconfig_txg = 0; - uint64_t tryconfig_timestamp = 0; - uint16_t tryconfig_mmp_seq = 0; - nvlist_t *nvinfo; +uint64_t state = 0; +uint64_t hostid = 0; +uint64_t tryconfig_txg = 0; +uint64_t tryconfig_timestamp = 0; +uint16_t tryconfig_mmp_seq = 0; +nvlist_t *nvinfo; - if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG, - &tryconfig_txg); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP, - &tryconfig_timestamp); - (void) nvlist_lookup_uint16(nvinfo, ZPOOL_CONFIG_MMP_SEQ, - &tryconfig_mmp_seq); - } +if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG, + &tryconfig_txg); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + &tryconfig_timestamp); + (void) nvlist_lookup_uint16(nvinfo, ZPOOL_CONFIG_MMP_SEQ, + &tryconfig_mmp_seq); +} - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state); +(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state); - /* - * Disable the MMP activity check - This is used by zdb which - * is intended to be used on potentially active pools. - */ - if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) - return (B_FALSE); +/* + * Disable the MMP activity check - This is used by zdb which + * is intended to be used on potentially active pools. + */ +if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) + return (B_FALSE); - /* - * Skip the activity check when the MMP feature is disabled. - */ - if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) - return (B_FALSE); +/* + * Skip the activity check when the MMP feature is disabled. + */ +if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) + return (B_FALSE); - /* - * If the tryconfig_ values are nonzero, they are the results of an - * earlier tryimport. If they all match the uberblock we just found, - * then the pool has not changed and we return false so we do not test - * a second time. - */ - if (tryconfig_txg && tryconfig_txg == ub->ub_txg && - tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp && - tryconfig_mmp_seq && tryconfig_mmp_seq == - (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) - return (B_FALSE); +/* + * If the tryconfig_ values are nonzero, they are the results of an + * earlier tryimport. If they all match the uberblock we just found, + * then the pool has not changed and we return false so we do not test + * a second time. + */ +if (tryconfig_txg && tryconfig_txg == ub->ub_txg && + tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp && + tryconfig_mmp_seq && tryconfig_mmp_seq == + (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) + return (B_FALSE); - /* - * Allow the activity check to be skipped when importing the pool - * on the same host which last imported it. Since the hostid from - * configuration may be stale use the one read from the label. - */ - if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) - hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); +/* + * Allow the activity check to be skipped when importing the pool + * on the same host which last imported it. Since the hostid from + * configuration may be stale use the one read from the label. + */ +if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); - if (hostid == spa_get_hostid()) - return (B_FALSE); +if (hostid == spa_get_hostid(spa)) + return (B_FALSE); - /* - * Skip the activity test when the pool was cleanly exported. - */ - if (state != POOL_STATE_ACTIVE) - return (B_FALSE); +/* + * Skip the activity test when the pool was cleanly exported. + */ +if (state != POOL_STATE_ACTIVE) + return (B_FALSE); - return (B_TRUE); +return (B_TRUE); } /* - * Nanoseconds the activity check must watch for changes on-disk. - */ +* Nanoseconds the activity check must watch for changes on-disk. +*/ static uint64_t spa_activity_check_duration(spa_t *spa, uberblock_t *ub) { - uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1); - uint64_t multihost_interval = MSEC2NSEC( - MMP_INTERVAL_OK(zfs_multihost_interval)); - uint64_t import_delay = MAX(NANOSEC, import_intervals * - multihost_interval); +uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1); +uint64_t multihost_interval = MSEC2NSEC( + MMP_INTERVAL_OK(zfs_multihost_interval)); +uint64_t import_delay = MAX(NANOSEC, import_intervals * + multihost_interval); - /* - * Local tunables determine a minimum duration except for the case - * where we know when the remote host will suspend the pool if MMP - * writes do not land. - * - * See Big Theory comment at the top of mmp.c for the reasoning behind - * these cases and times. - */ +/* + * Local tunables determine a minimum duration except for the case + * where we know when the remote host will suspend the pool if MMP + * writes do not land. + * + * See Big Theory comment at the top of mmp.c for the reasoning behind + * these cases and times. + */ - ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100); +ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100); - if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && - MMP_FAIL_INT(ub) > 0) { +if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && + MMP_FAIL_INT(ub) > 0) { - /* MMP on remote host will suspend pool after failed writes */ - import_delay = MMP_FAIL_INT(ub) * MSEC2NSEC(MMP_INTERVAL(ub)) * - MMP_IMPORT_SAFETY_FACTOR / 100; + /* MMP on remote host will suspend pool after failed writes */ + import_delay = MMP_FAIL_INT(ub) * MSEC2NSEC(MMP_INTERVAL(ub)) * + MMP_IMPORT_SAFETY_FACTOR / 100; - zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp " - "mmp_fails=%llu ub_mmp mmp_interval=%llu " - "import_intervals=%u", import_delay, MMP_FAIL_INT(ub), - MMP_INTERVAL(ub), import_intervals); + zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp " + "mmp_fails=%llu ub_mmp mmp_interval=%llu " + "import_intervals=%u", import_delay, MMP_FAIL_INT(ub), + MMP_INTERVAL(ub), import_intervals); - } else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && - MMP_FAIL_INT(ub) == 0) { +} else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) && + MMP_FAIL_INT(ub) == 0) { - /* MMP on remote host will never suspend pool */ - import_delay = MAX(import_delay, (MSEC2NSEC(MMP_INTERVAL(ub)) + - ub->ub_mmp_delay) * import_intervals); + /* MMP on remote host will never suspend pool */ + import_delay = MAX(import_delay, (MSEC2NSEC(MMP_INTERVAL(ub)) + + ub->ub_mmp_delay) * import_intervals); - zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp " - "mmp_interval=%llu ub_mmp_delay=%llu " - "import_intervals=%u", import_delay, MMP_INTERVAL(ub), - ub->ub_mmp_delay, import_intervals); + zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp " + "mmp_interval=%llu ub_mmp_delay=%llu " + "import_intervals=%u", import_delay, MMP_INTERVAL(ub), + ub->ub_mmp_delay, import_intervals); - } else if (MMP_VALID(ub)) { - /* - * zfs-0.7 compatability case - */ +} else if (MMP_VALID(ub)) { + /* + * zfs-0.7 compatibility case + */ - import_delay = MAX(import_delay, (multihost_interval + - ub->ub_mmp_delay) * import_intervals); + import_delay = MAX(import_delay, (multihost_interval + + ub->ub_mmp_delay) * import_intervals); - zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu " - "import_intervals=%u leaves=%u", import_delay, - ub->ub_mmp_delay, import_intervals, - vdev_count_leaves(spa)); - } else { - /* Using local tunings is the only reasonable option */ - zfs_dbgmsg("pool last imported on non-MMP aware " - "host using import_delay=%llu multihost_interval=%llu " - "import_intervals=%u", import_delay, multihost_interval, - import_intervals); - } + zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu " + "import_intervals=%u leaves=%u", import_delay, + ub->ub_mmp_delay, import_intervals, + vdev_count_leaves(spa)); +} else { + /* Using local tunings is the only reasonable option */ + zfs_dbgmsg("pool last imported on non-MMP aware " + "host using import_delay=%llu multihost_interval=%llu " + "import_intervals=%u", import_delay, multihost_interval, + import_intervals); +} - return (import_delay); +return (import_delay); } /* - * Perform the import activity check. If the user canceled the import or - * we detected activity then fail. - */ +* Perform the import activity check. If the user canceled the import or +* we detected activity then fail. +*/ static int spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) { - uint64_t txg = ub->ub_txg; - uint64_t timestamp = ub->ub_timestamp; - uint64_t mmp_config = ub->ub_mmp_config; - uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; - uint64_t import_delay; - hrtime_t import_expire; - nvlist_t *mmp_label = NULL; - vdev_t *rvd = spa->spa_root_vdev; - kcondvar_t cv; - kmutex_t mtx; - int error = 0; +uint64_t txg = ub->ub_txg; +uint64_t timestamp = ub->ub_timestamp; +uint64_t mmp_config = ub->ub_mmp_config; +uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0; +uint64_t import_delay; +hrtime_t import_expire; +nvlist_t *mmp_label = NULL; +vdev_t *rvd = spa->spa_root_vdev; +kcondvar_t cv; +kmutex_t mtx; +int error = 0; + +cv_init(&cv, NULL, CV_DEFAULT, NULL); +mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL); +mutex_enter(&mtx); - cv_init(&cv, NULL, CV_DEFAULT, NULL); - mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL); - mutex_enter(&mtx); +/* + * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed + * during the earlier tryimport. If the txg recorded there is 0 then + * the pool is known to be active on another host. + * + * Otherwise, the pool might be in use on another host. Check for + * changes in the uberblocks on disk if necessary. + */ +if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvlist_t *nvinfo = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO); - /* - * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed - * during the earlier tryimport. If the txg recorded there is 0 then - * the pool is known to be active on another host. - * - * Otherwise, the pool might be in use on another host. Check for - * changes in the uberblocks on disk if necessary. - */ - if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { - nvlist_t *nvinfo = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_LOAD_INFO); - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) && - fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) { - vdev_uberblock_load(rvd, ub, &mmp_label); - error = SET_ERROR(EREMOTEIO); - goto out; - } + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) && + fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) { + vdev_uberblock_load(rvd, ub, &mmp_label); + error = SET_ERROR(EREMOTEIO); + goto out; } +} - import_delay = spa_activity_check_duration(spa, ub); +import_delay = spa_activity_check_duration(spa, ub); - /* Add a small random factor in case of simultaneous imports (0-25%) */ - import_delay += import_delay * spa_get_random(250) / 1000; +/* Add a small random factor in case of simultaneous imports (0-25%) */ +import_delay += import_delay * spa_get_random(250) / 1000; - import_expire = gethrtime() + import_delay; +import_expire = gethrtime() + import_delay; - while (gethrtime() < import_expire) { - (void) spa_import_progress_set_mmp_check(spa_guid(spa), - NSEC2SEC(import_expire - gethrtime())); +while (gethrtime() < import_expire) { + (void) spa_import_progress_set_mmp_check(spa_guid(spa), + NSEC2SEC(import_expire - gethrtime())); - vdev_uberblock_load(rvd, ub, &mmp_label); + vdev_uberblock_load(rvd, ub, &mmp_label); - if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || - mmp_seq != (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) { - zfs_dbgmsg("multihost activity detected " - "txg %llu ub_txg %llu " - "timestamp %llu ub_timestamp %llu " - "mmp_config %#llx ub_mmp_config %#llx", - txg, ub->ub_txg, timestamp, ub->ub_timestamp, - mmp_config, ub->ub_mmp_config); + if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || + mmp_seq != (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) { + zfs_dbgmsg("multihost activity detected " + "txg %llu ub_txg %llu " + "timestamp %llu ub_timestamp %llu " + "mmp_config %#llx ub_mmp_config %#llx", + txg, ub->ub_txg, timestamp, ub->ub_timestamp, + mmp_config, ub->ub_mmp_config); - error = SET_ERROR(EREMOTEIO); - break; - } + error = SET_ERROR(EREMOTEIO); + break; + } - if (mmp_label) { - nvlist_free(mmp_label); - mmp_label = NULL; - } + if (mmp_label) { + nvlist_free(mmp_label); + mmp_label = NULL; + } - error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz); - if (error != -1) { - error = SET_ERROR(EINTR); - break; - } - error = 0; + error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz); + if (error != -1) { + error = SET_ERROR(EINTR); + break; } + error = 0; +} out: - mutex_exit(&mtx); - mutex_destroy(&mtx); - cv_destroy(&cv); +mutex_exit(&mtx); +mutex_destroy(&mtx); +cv_destroy(&cv); - /* - * If the pool is determined to be active store the status in the - * spa->spa_load_info nvlist. If the remote hostname or hostid are - * available from configuration read from disk store them as well. - * This allows 'zpool import' to generate a more useful message. - * - * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) - * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool - * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool - */ - if (error == EREMOTEIO) { - char *hostname = ""; - uint64_t hostid = 0; - - if (mmp_label) { - if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) { - hostname = fnvlist_lookup_string(mmp_label, - ZPOOL_CONFIG_HOSTNAME); - fnvlist_add_string(spa->spa_load_info, - ZPOOL_CONFIG_MMP_HOSTNAME, hostname); - } +/* + * If the pool is determined to be active store the status in the + * spa->spa_load_info nvlist. If the remote hostname or hostid are + * available from configuration read from disk store them as well. + * This allows 'zpool import' to generate a more useful message. + * + * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) + * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool + * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool + */ +if (error == EREMOTEIO) { + char *hostname = ""; + uint64_t hostid = 0; - if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) { - hostid = fnvlist_lookup_uint64(mmp_label, - ZPOOL_CONFIG_HOSTID); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_HOSTID, hostid); - } + if (mmp_label) { + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) { + hostname = fnvlist_lookup_string(mmp_label, + ZPOOL_CONFIG_HOSTNAME); + fnvlist_add_string(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTNAME, hostname); } - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_TXG, 0); - - error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO); + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) { + hostid = fnvlist_lookup_uint64(mmp_label, + ZPOOL_CONFIG_HOSTID); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTID, hostid); + } } - if (mmp_label) - nvlist_free(mmp_label); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, 0); - return (error); + error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO); +} + +if (mmp_label) + nvlist_free(mmp_label); + +return (error); } static int spa_verify_host(spa_t *spa, nvlist_t *mos_config) { - uint64_t hostid; - char *hostname; - uint64_t myhostid = 0; - - if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config, - ZPOOL_CONFIG_HOSTID, &hostid) == 0) { - hostname = fnvlist_lookup_string(mos_config, - ZPOOL_CONFIG_HOSTNAME); - - myhostid = zone_get_hostid(NULL); - - if (hostid != 0 && myhostid != 0 && hostid != myhostid) { - cmn_err(CE_WARN, "pool '%s' could not be " - "loaded as it was last accessed by " - "another system (host: %s hostid: 0x%llx). " - "See: http://illumos.org/msg/ZFS-8000-EY", - spa_name(spa), hostname, (u_longlong_t)hostid); - spa_load_failed(spa, "hostid verification failed: pool " - "last accessed by host: %s (hostid: 0x%llx)", - hostname, (u_longlong_t)hostid); - return (SET_ERROR(EBADF)); - } +uint64_t hostid; +char *hostname; +uint64_t myhostid = 0; + +if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config, + ZPOOL_CONFIG_HOSTID, &hostid) == 0) { + hostname = fnvlist_lookup_string(mos_config, + ZPOOL_CONFIG_HOSTNAME); + + myhostid = zone_get_hostid(NULL); + + if (hostid != 0 && myhostid != 0 && hostid != myhostid) { + cmn_err(CE_WARN, "pool '%s' could not be " + "loaded as it was last accessed by " + "another system (host: %s hostid: 0x%llx). " + "See: http://illumos.org/msg/ZFS-8000-EY", + spa_name(spa), hostname, (u_longlong_t)hostid); + spa_load_failed(spa, "hostid verification failed: pool " + "last accessed by host: %s (hostid: 0x%llx)", + hostname, (u_longlong_t)hostid); + return (SET_ERROR(EBADF)); } +} - return (0); +return (0); } static int spa_ld_parse_config(spa_t *spa, spa_import_type_t type) { - int error = 0; - nvlist_t *nvtree, *nvl, *config = spa->spa_config; - int parse; - vdev_t *rvd; - uint64_t pool_guid; - char *comment; +int error = 0; +nvlist_t *nvtree, *nvl, *config = spa->spa_config; +int parse; +vdev_t *rvd; +uint64_t pool_guid; +char *comment; - /* - * Versioning wasn't explicitly added to the label until later, so if - * it's not present treat it as the initial version. - */ - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &spa->spa_ubsync.ub_version) != 0) - spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; +/* + * Versioning wasn't explicitly added to the label until later, so if + * it's not present treat it as the initial version. + */ +if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &spa->spa_ubsync.ub_version) != 0) + spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { - spa_load_failed(spa, "invalid config provided: '%s' missing", - ZPOOL_CONFIG_POOL_GUID); - return (SET_ERROR(EINVAL)); - } +if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { + spa_load_failed(spa, "invalid config provided: '%s' missing", + ZPOOL_CONFIG_POOL_GUID); + return (SET_ERROR(EINVAL)); +} - /* - * If we are doing an import, ensure that the pool is not already - * imported by checking if its pool guid already exists in the - * spa namespace. - * - * The only case that we allow an already imported pool to be - * imported again, is when the pool is checkpointed and we want to - * look at its checkpointed state from userland tools like zdb. - */ +/* + * If we are doing an import, ensure that the pool is not already + * imported by checking if its pool guid already exists in the + * spa namespace. + * + * The only case that we allow an already imported pool to be + * imported again, is when the pool is checkpointed and we want to + * look at its checkpointed state from userland tools like zdb. + */ #ifdef _KERNEL - if ((spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) && - spa_guid_exists(pool_guid, 0)) { +if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0)) { #else - if ((spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) && - spa_guid_exists(pool_guid, 0) && - !spa_importing_readonly_checkpoint(spa)) { +if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0) && + !spa_importing_readonly_checkpoint(spa)) { #endif - spa_load_failed(spa, "a pool with guid %llu is already open", - (u_longlong_t)pool_guid); - return (SET_ERROR(EEXIST)); - } - - spa->spa_config_guid = pool_guid; + spa_load_failed(spa, "a pool with guid %llu is already open", + (u_longlong_t)pool_guid); + return (SET_ERROR(EEXIST)); +} - nvlist_free(spa->spa_load_info); - spa->spa_load_info = fnvlist_alloc(); +spa->spa_config_guid = pool_guid; - ASSERT(spa->spa_comment == NULL); - if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) - spa->spa_comment = spa_strdup(comment); +nvlist_free(spa->spa_load_info); +spa->spa_load_info = fnvlist_alloc(); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, - &spa->spa_config_txg); +ASSERT(spa->spa_comment == NULL); +if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + spa->spa_comment = spa_strdup(comment); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) - spa->spa_config_splitting = fnvlist_dup(nvl); +(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, + &spa->spa_config_txg); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree)) { - spa_load_failed(spa, "invalid config provided: '%s' missing", - ZPOOL_CONFIG_VDEV_TREE); - return (SET_ERROR(EINVAL)); - } +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) + spa->spa_config_splitting = fnvlist_dup(nvl); - /* - * Create "The Godfather" zio to hold all async IOs - */ - spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), - KM_SLEEP); - for (int i = 0; i < max_ncpus; i++) { - spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); - } +if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree)) { + spa_load_failed(spa, "invalid config provided: '%s' missing", + ZPOOL_CONFIG_VDEV_TREE); + return (SET_ERROR(EINVAL)); +} - /* - * Parse the configuration into a vdev tree. We explicitly set the - * value that will be returned by spa_version() since parsing the - * configuration requires knowing the version number. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - parse = (type == SPA_IMPORT_EXISTING ? - VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); - error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); - spa_config_exit(spa, SCL_ALL, FTAG); +/* + * Create "The Godfather" zio to hold all async IOs + */ +spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); +for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); +} - if (error != 0) { - spa_load_failed(spa, "unable to parse config [error=%d]", - error); - return (error); - } +/* + * Parse the configuration into a vdev tree. We explicitly set the + * value that will be returned by spa_version() since parsing the + * configuration requires knowing the version number. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +parse = (type == SPA_IMPORT_EXISTING ? + VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); +error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); +spa_config_exit(spa, SCL_ALL, FTAG); + +if (error != 0) { + spa_load_failed(spa, "unable to parse config [error=%d]", + error); + return (error); +} - ASSERT(spa->spa_root_vdev == rvd); - ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); - ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); +ASSERT(spa->spa_root_vdev == rvd); +ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); +ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); - if (type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_guid(spa) == pool_guid); - } +if (type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_guid(spa) == pool_guid); +} - return (0); +return (0); } /* - * Recursively open all vdevs in the vdev tree. This function is called twice: - * first with the untrusted config, then with the trusted config. - */ +* Recursively open all vdevs in the vdev tree. This function is called twice: +* first with the untrusted config, then with the trusted config. +*/ static int spa_ld_open_vdevs(spa_t *spa) { - int error = 0; +int error = 0; - /* - * spa_missing_tvds_allowed defines how many top-level vdevs can be - * missing/unopenable for the root vdev to be still considered openable. - */ - if (spa->spa_trust_config) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds; - } else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile; - } else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) { - spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan; +/* + * spa_missing_tvds_allowed defines how many top-level vdevs can be + * missing/unopenable for the root vdev to be still considered openable. + */ +if (spa->spa_trust_config) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds; +} else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile; +} else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) { + spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan; +} else { + spa->spa_missing_tvds_allowed = 0; +} + +spa->spa_missing_tvds_allowed = + MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed); + +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +error = vdev_open(spa->spa_root_vdev); +spa_config_exit(spa, SCL_ALL, FTAG); + +if (spa->spa_missing_tvds != 0) { + spa_load_note(spa, "vdev tree has %lld missing top-level " + "vdevs.", (u_longlong_t)spa->spa_missing_tvds); + if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { + /* + * Although theoretically we could allow users to open + * incomplete pools in RW mode, we'd need to add a lot + * of extra logic (e.g. adjust pool space to account + * for missing vdevs). + * This limitation also prevents users from accidentally + * opening the pool in RW mode during data recovery and + * damaging it further. + */ + spa_load_note(spa, "pools with missing top-level " + "vdevs can only be opened in read-only mode."); + error = SET_ERROR(ENXIO); } else { - spa->spa_missing_tvds_allowed = 0; - } - - spa->spa_missing_tvds_allowed = - MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed); - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_open(spa->spa_root_vdev); - spa_config_exit(spa, SCL_ALL, FTAG); - - if (spa->spa_missing_tvds != 0) { - spa_load_note(spa, "vdev tree has %lld missing top-level " - "vdevs.", (u_longlong_t)spa->spa_missing_tvds); - if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { - /* - * Although theoretically we could allow users to open - * incomplete pools in RW mode, we'd need to add a lot - * of extra logic (e.g. adjust pool space to account - * for missing vdevs). - * This limitation also prevents users from accidentally - * opening the pool in RW mode during data recovery and - * damaging it further. - */ - spa_load_note(spa, "pools with missing top-level " - "vdevs can only be opened in read-only mode."); - error = SET_ERROR(ENXIO); - } else { - spa_load_note(spa, "current settings allow for maximum " - "%lld missing top-level vdevs at this stage.", - (u_longlong_t)spa->spa_missing_tvds_allowed); - } - } - if (error != 0) { - spa_load_failed(spa, "unable to open vdev tree [error=%d]", - error); + spa_load_note(spa, "current settings allow for maximum " + "%lld missing top-level vdevs at this stage.", + (u_longlong_t)spa->spa_missing_tvds_allowed); } - if (spa->spa_missing_tvds != 0 || error != 0) - vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2); +} +if (error != 0) { + spa_load_failed(spa, "unable to open vdev tree [error=%d]", + error); +} +if (spa->spa_missing_tvds != 0 || error != 0) + vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2); - return (error); +return (error); } /* - * We need to validate the vdev labels against the configuration that - * we have in hand. This function is called twice: first with an untrusted - * config, then with a trusted config. The validation is more strict when the - * config is trusted. - */ +* We need to validate the vdev labels against the configuration that +* we have in hand. This function is called twice: first with an untrusted +* config, then with a trusted config. The validation is more strict when the +* config is trusted. +*/ static int spa_ld_validate_vdevs(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_validate(rvd); - spa_config_exit(spa, SCL_ALL, FTAG); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +error = vdev_validate(rvd); +spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0) { - spa_load_failed(spa, "vdev_validate failed [error=%d]", error); - return (error); - } +if (error != 0) { + spa_load_failed(spa, "vdev_validate failed [error=%d]", error); + return (error); +} - if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { - spa_load_failed(spa, "cannot open vdev tree after invalidating " - "some vdevs"); - vdev_dbgmsg_print_tree(rvd, 2); - return (SET_ERROR(ENXIO)); - } +if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { + spa_load_failed(spa, "cannot open vdev tree after invalidating " + "some vdevs"); + vdev_dbgmsg_print_tree(rvd, 2); + return (SET_ERROR(ENXIO)); +} - return (0); +return (0); } static void spa_ld_select_uberblock_done(spa_t *spa, uberblock_t *ub) { - spa->spa_state = POOL_STATE_ACTIVE; - spa->spa_ubsync = spa->spa_uberblock; - spa->spa_verify_min_txg = spa->spa_extreme_rewind ? - TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; - spa->spa_first_txg = spa->spa_last_ubsync_txg ? - spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; - spa->spa_claim_max_txg = spa->spa_first_txg; - spa->spa_prev_software_version = ub->ub_software_version; +spa->spa_state = POOL_STATE_ACTIVE; +spa->spa_ubsync = spa->spa_uberblock; +spa->spa_verify_min_txg = spa->spa_extreme_rewind ? + TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; +spa->spa_first_txg = spa->spa_last_ubsync_txg ? + spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; +spa->spa_claim_max_txg = spa->spa_first_txg; +spa->spa_prev_software_version = ub->ub_software_version; } static int spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) { - vdev_t *rvd = spa->spa_root_vdev; - nvlist_t *label; - uberblock_t *ub = &spa->spa_uberblock; - boolean_t activity_check = B_FALSE; - - /* - * If we are opening the checkpointed state of the pool by - * rewinding to it, at this point we will have written the - * checkpointed uberblock to the vdev labels, so searching - * the labels will find the right uberblock. However, if - * we are opening the checkpointed state read-only, we have - * not modified the labels. Therefore, we must ignore the - * labels and continue using the spa_uberblock that was set - * by spa_ld_checkpoint_rewind. - * - * Note that it would be fine to ignore the labels when - * rewinding (opening writeable) as well. However, if we - * crash just after writing the labels, we will end up - * searching the labels. Doing so in the common case means - * that this code path gets exercised normally, rather than - * just in the edge case. - */ - if (ub->ub_checkpoint_txg != 0 && - spa_importing_readonly_checkpoint(spa)) { - spa_ld_select_uberblock_done(spa, ub); - return (0); - } - - /* - * Find the best uberblock. - */ - vdev_uberblock_load(rvd, ub, &label); +vdev_t *rvd = spa->spa_root_vdev; +nvlist_t *label; +uberblock_t *ub = &spa->spa_uberblock; +boolean_t activity_check = B_FALSE; - /* - * If we weren't able to find a single valid uberblock, return failure. - */ - if (ub->ub_txg == 0) { - nvlist_free(label); - spa_load_failed(spa, "no valid uberblock found"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); - } +/* + * If we are opening the checkpointed state of the pool by + * rewinding to it, at this point we will have written the + * checkpointed uberblock to the vdev labels, so searching + * the labels will find the right uberblock. However, if + * we are opening the checkpointed state read-only, we have + * not modified the labels. Therefore, we must ignore the + * labels and continue using the spa_uberblock that was set + * by spa_ld_checkpoint_rewind. + * + * Note that it would be fine to ignore the labels when + * rewinding (opening writeable) as well. However, if we + * crash just after writing the labels, we will end up + * searching the labels. Doing so in the common case means + * that this code path gets exercised normally, rather than + * just in the edge case. + */ +if (ub->ub_checkpoint_txg != 0 && + spa_importing_readonly_checkpoint(spa)) { + spa_ld_select_uberblock_done(spa, ub); + return (0); +} - if (spa->spa_load_max_txg != UINT64_MAX) { - (void) spa_import_progress_set_max_txg(spa_guid(spa), - (u_longlong_t)spa->spa_load_max_txg); - } - spa_load_note(spa, "using uberblock with txg=%llu", - (u_longlong_t)ub->ub_txg); +/* + * Find the best uberblock. + */ +vdev_uberblock_load(rvd, ub, &label); +/* + * If we weren't able to find a single valid uberblock, return failure. + */ +if (ub->ub_txg == 0) { + nvlist_free(label); + spa_load_failed(spa, "no valid uberblock found"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); +} - /* - * For pools which have the multihost property on determine if the - * pool is truly inactive and can be safely imported. Prevent - * hosts which don't have a hostid set from importing the pool. - */ - activity_check = spa_activity_check_required(spa, ub, label, - spa->spa_config); - if (activity_check) { - if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && - spa_get_hostid() == 0) { - nvlist_free(label); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); - return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); - } +if (spa->spa_load_max_txg != UINT64_MAX) { + (void) spa_import_progress_set_max_txg(spa_guid(spa), + (u_longlong_t)spa->spa_load_max_txg); +} +spa_load_note(spa, "using uberblock with txg=%llu", + (u_longlong_t)ub->ub_txg); - int error = spa_activity_check(spa, ub, spa->spa_config); - if (error) { - nvlist_free(label); - return (error); - } +/* + * For pools which have the multihost property on determine if the + * pool is truly inactive and can be safely imported. Prevent + * hosts which don't have a hostid set from importing the pool. + */ +activity_check = spa_activity_check_required(spa, ub, label, + spa->spa_config); +if (activity_check) { + if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && + spa_get_hostid(spa) == 0) { + nvlist_free(label); fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE); - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_TXG, ub->ub_txg); - fnvlist_add_uint16(spa->spa_load_info, - ZPOOL_CONFIG_MMP_SEQ, - (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)); + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); } - /* - * If the pool has an unsupported version we can't open it. - */ - if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { + int error = spa_activity_check(spa, ub, spa->spa_config); + if (error) { nvlist_free(label); - spa_load_failed(spa, "version %llu is not supported", - (u_longlong_t)ub->ub_version); - return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); + return (error); } - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *features; + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, ub->ub_txg); + fnvlist_add_uint16(spa->spa_load_info, + ZPOOL_CONFIG_MMP_SEQ, + (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)); +} - /* - * If we weren't able to find what's necessary for reading the - * MOS in the label, return failure. - */ - if (label == NULL) { - spa_load_failed(spa, "label config unavailable"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - ENXIO)); - } +/* + * If the pool has an unsupported version we can't open it. + */ +if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { + nvlist_free(label); + spa_load_failed(spa, "version %llu is not supported", + (u_longlong_t)ub->ub_version); + return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); +} - if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, - &features) != 0) { - nvlist_free(label); - spa_load_failed(spa, "invalid label: '%s' missing", - ZPOOL_CONFIG_FEATURES_FOR_READ); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - ENXIO)); - } +if (ub->ub_version >= SPA_VERSION_FEATURES) { + nvlist_t *features; - /* - * Update our in-core representation with the definitive values - * from the label. - */ - nvlist_free(spa->spa_label_features); - VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); + /* + * If we weren't able to find what's necessary for reading the + * MOS in the label, return failure. + */ + if (label == NULL) { + spa_load_failed(spa, "label config unavailable"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + ENXIO)); } - nvlist_free(label); + if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, + &features) != 0) { + nvlist_free(label); + spa_load_failed(spa, "invalid label: '%s' missing", + ZPOOL_CONFIG_FEATURES_FOR_READ); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + ENXIO)); + } /* - * Look through entries in the label nvlist's features_for_read. If - * there is a feature listed there which we don't understand then we - * cannot open a pool. + * Update our in-core representation with the definitive values + * from the label. */ - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *unsup_feat; - - VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == - 0); - - for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, - NULL); nvp != NULL; - nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { - if (!zfeature_is_supported(nvpair_name(nvp))) { - VERIFY(nvlist_add_string(unsup_feat, - nvpair_name(nvp), "") == 0); - } - } + nvlist_free(spa->spa_label_features); + VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); +} + +nvlist_free(label); - if (!nvlist_empty(unsup_feat)) { - VERIFY(nvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); - nvlist_free(unsup_feat); - spa_load_failed(spa, "some features are unsupported"); - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); +/* + * Look through entries in the label nvlist's features_for_read. If + * there is a feature listed there which we don't understand then we + * cannot open a pool. + */ +if (ub->ub_version >= SPA_VERSION_FEATURES) { + nvlist_t *unsup_feat; + + VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == + 0); + + for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, + NULL); nvp != NULL; + nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { + if (!zfeature_is_supported(nvpair_name(nvp))) { + VERIFY(nvlist_add_string(unsup_feat, + nvpair_name(nvp), "") == 0); } + } + if (!nvlist_empty(unsup_feat)) { + VERIFY(nvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); nvlist_free(unsup_feat); + spa_load_failed(spa, "some features are unsupported"); + return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); } - if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_try_repair(spa, spa->spa_config); - spa_config_exit(spa, SCL_ALL, FTAG); - nvlist_free(spa->spa_config_splitting); - spa->spa_config_splitting = NULL; - } + nvlist_free(unsup_feat); +} - /* - * Initialize internal SPA structures. - */ - spa_ld_select_uberblock_done(spa, ub); +if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_try_repair(spa, spa->spa_config); + spa_config_exit(spa, SCL_ALL, FTAG); + nvlist_free(spa->spa_config_splitting); + spa->spa_config_splitting = NULL; +} - return (0); +/* + * Initialize internal SPA structures. + */ +spa_ld_select_uberblock_done(spa, ub); + +return (0); } static int spa_ld_open_rootbp(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); - if (error != 0) { - spa_load_failed(spa, "unable to open rootbp in dsl_pool_init " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; +error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); +if (error != 0) { + spa_load_failed(spa, "unable to open rootbp in dsl_pool_init " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} +spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; - return (0); +return (0); } static int spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, - boolean_t reloading) +boolean_t reloading) { - vdev_t *mrvd, *rvd = spa->spa_root_vdev; - nvlist_t *nv, *mos_config, *policy; - int error = 0, copy_error; - uint64_t healthy_tvds, healthy_tvds_mos; - uint64_t mos_config_txg; +vdev_t *mrvd, *rvd = spa->spa_root_vdev; +nvlist_t *nv, *mos_config, *policy; +int error = 0, copy_error; +uint64_t healthy_tvds, healthy_tvds_mos; +uint64_t mos_config_txg; - if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE) - != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE) + != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * If we're assembling a pool from a split, the config provided is - * already trusted so there is nothing to do. - */ - if (type == SPA_IMPORT_ASSEMBLE) - return (0); +/* + * If we're assembling a pool from a split, the config provided is + * already trusted so there is nothing to do. + */ +if (type == SPA_IMPORT_ASSEMBLE) + return (0); - healthy_tvds = spa_healthy_core_tvds(spa); +healthy_tvds = spa_healthy_core_tvds(spa); - if (load_nvlist(spa, spa->spa_config_object, &mos_config) - != 0) { - spa_load_failed(spa, "unable to retrieve MOS config"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (load_nvlist(spa, spa->spa_config_object, &mos_config) + != 0) { + spa_load_failed(spa, "unable to retrieve MOS config"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* - * If we are doing an open, pool owner wasn't verified yet, thus do - * the verification here. - */ - if (spa->spa_load_state == SPA_LOAD_OPEN) { - error = spa_verify_host(spa, mos_config); - if (error != 0) { - nvlist_free(mos_config); - return (error); - } +/* + * If we are doing an open, pool owner wasn't verified yet, thus do + * the verification here. + */ +if (spa->spa_load_state == SPA_LOAD_OPEN) { + error = spa_verify_host(spa, mos_config); + if (error != 0) { + nvlist_free(mos_config); + return (error); } +} - nv = fnvlist_lookup_nvlist(mos_config, ZPOOL_CONFIG_VDEV_TREE); +nv = fnvlist_lookup_nvlist(mos_config, ZPOOL_CONFIG_VDEV_TREE); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - /* - * Build a new vdev tree from the trusted config - */ - VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); +/* + * Build a new vdev tree from the trusted config + */ +VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); - /* - * Vdev paths in the MOS may be obsolete. If the untrusted config was - * obtained by scanning /dev/dsk, then it will have the right vdev - * paths. We update the trusted MOS config with this information. - * We first try to copy the paths with vdev_copy_path_strict, which - * succeeds only when both configs have exactly the same vdev tree. - * If that fails, we fall back to a more flexible method that has a - * best effort policy. - */ - copy_error = vdev_copy_path_strict(rvd, mrvd); - if (copy_error != 0 || spa_load_print_vdev_tree) { - spa_load_note(spa, "provided vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - spa_load_note(spa, "MOS vdev tree:"); - vdev_dbgmsg_print_tree(mrvd, 2); - } - if (copy_error != 0) { - spa_load_note(spa, "vdev_copy_path_strict failed, falling " - "back to vdev_copy_path_relaxed"); - vdev_copy_path_relaxed(rvd, mrvd); - } +/* + * Vdev paths in the MOS may be obsolete. If the untrusted config was + * obtained by scanning /dev/dsk, then it will have the right vdev + * paths. We update the trusted MOS config with this information. + * We first try to copy the paths with vdev_copy_path_strict, which + * succeeds only when both configs have exactly the same vdev tree. + * If that fails, we fall back to a more flexible method that has a + * best effort policy. + */ +copy_error = vdev_copy_path_strict(rvd, mrvd); +if (copy_error != 0 || spa_load_print_vdev_tree) { + spa_load_note(spa, "provided vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); + spa_load_note(spa, "MOS vdev tree:"); + vdev_dbgmsg_print_tree(mrvd, 2); +} +if (copy_error != 0) { + spa_load_note(spa, "vdev_copy_path_strict failed, falling " + "back to vdev_copy_path_relaxed"); + vdev_copy_path_relaxed(rvd, mrvd); +} - vdev_close(rvd); - vdev_free(rvd); - spa->spa_root_vdev = mrvd; - rvd = mrvd; - spa_config_exit(spa, SCL_ALL, FTAG); +vdev_close(rvd); +vdev_free(rvd); +spa->spa_root_vdev = mrvd; +rvd = mrvd; +spa_config_exit(spa, SCL_ALL, FTAG); - /* - * We will use spa_config if we decide to reload the spa or if spa_load - * fails and we rewind. We must thus regenerate the config using the - * MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to - * pass settings on how to load the pool and is not stored in the MOS. - * We copy it over to our new, trusted config. - */ - mos_config_txg = fnvlist_lookup_uint64(mos_config, - ZPOOL_CONFIG_POOL_TXG); - nvlist_free(mos_config); - mos_config = spa_config_generate(spa, NULL, mos_config_txg, B_FALSE); - if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY, - &policy) == 0) - fnvlist_add_nvlist(mos_config, ZPOOL_LOAD_POLICY, policy); - spa_config_set(spa, mos_config); - spa->spa_config_source = SPA_CONFIG_SRC_MOS; +/* + * We will use spa_config if we decide to reload the spa or if spa_load + * fails and we rewind. We must thus regenerate the config using the + * MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to + * pass settings on how to load the pool and is not stored in the MOS. + * We copy it over to our new, trusted config. + */ +mos_config_txg = fnvlist_lookup_uint64(mos_config, + ZPOOL_CONFIG_POOL_TXG); +nvlist_free(mos_config); +mos_config = spa_config_generate(spa, NULL, mos_config_txg, B_FALSE); +if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY, + &policy) == 0) + fnvlist_add_nvlist(mos_config, ZPOOL_LOAD_POLICY, policy); +spa_config_set(spa, mos_config); +spa->spa_config_source = SPA_CONFIG_SRC_MOS; - /* - * Now that we got the config from the MOS, we should be more strict - * in checking blkptrs and can make assumptions about the consistency - * of the vdev tree. spa_trust_config must be set to true before opening - * vdevs in order for them to be writeable. - */ - spa->spa_trust_config = B_TRUE; +/* + * Now that we got the config from the MOS, we should be more strict + * in checking blkptrs and can make assumptions about the consistency + * of the vdev tree. spa_trust_config must be set to true before opening + * vdevs in order for them to be writeable. + */ +spa->spa_trust_config = B_TRUE; - /* - * Open and validate the new vdev tree - */ - error = spa_ld_open_vdevs(spa); - if (error != 0) - return (error); +/* + * Open and validate the new vdev tree + */ +error = spa_ld_open_vdevs(spa); +if (error != 0) + return (error); - error = spa_ld_validate_vdevs(spa); - if (error != 0) - return (error); +error = spa_ld_validate_vdevs(spa); +if (error != 0) + return (error); - if (copy_error != 0 || spa_load_print_vdev_tree) { - spa_load_note(spa, "final vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - } +if (copy_error != 0 || spa_load_print_vdev_tree) { + spa_load_note(spa, "final vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); +} - if (spa->spa_load_state != SPA_LOAD_TRYIMPORT && - !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) { - /* - * Sanity check to make sure that we are indeed loading the - * latest uberblock. If we missed SPA_SYNC_MIN_VDEVS tvds - * in the config provided and they happened to be the only ones - * to have the latest uberblock, we could involuntarily perform - * an extreme rewind. - */ - healthy_tvds_mos = spa_healthy_core_tvds(spa); - if (healthy_tvds_mos - healthy_tvds >= - SPA_SYNC_MIN_VDEVS) { - spa_load_note(spa, "config provided misses too many " - "top-level vdevs compared to MOS (%lld vs %lld). ", - (u_longlong_t)healthy_tvds, - (u_longlong_t)healthy_tvds_mos); - spa_load_note(spa, "vdev tree:"); - vdev_dbgmsg_print_tree(rvd, 2); - if (reloading) { - spa_load_failed(spa, "config was already " - "provided from MOS. Aborting."); - return (spa_vdev_err(rvd, - VDEV_AUX_CORRUPT_DATA, EIO)); - } - spa_load_note(spa, "spa must be reloaded using MOS " - "config"); - return (SET_ERROR(EAGAIN)); +if (spa->spa_load_state != SPA_LOAD_TRYIMPORT && + !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) { + /* + * Sanity check to make sure that we are indeed loading the + * latest uberblock. If we missed SPA_SYNC_MIN_VDEVS tvds + * in the config provided and they happened to be the only ones + * to have the latest uberblock, we could involuntarily perform + * an extreme rewind. + */ + healthy_tvds_mos = spa_healthy_core_tvds(spa); + if (healthy_tvds_mos - healthy_tvds >= + SPA_SYNC_MIN_VDEVS) { + spa_load_note(spa, "config provided misses too many " + "top-level vdevs compared to MOS (%lld vs %lld). ", + (u_longlong_t)healthy_tvds, + (u_longlong_t)healthy_tvds_mos); + spa_load_note(spa, "vdev tree:"); + vdev_dbgmsg_print_tree(rvd, 2); + if (reloading) { + spa_load_failed(spa, "config was already " + "provided from MOS. Aborting."); + return (spa_vdev_err(rvd, + VDEV_AUX_CORRUPT_DATA, EIO)); } + spa_load_note(spa, "spa must be reloaded using MOS " + "config"); + return (SET_ERROR(EAGAIN)); } +} - error = spa_check_for_missing_logs(spa); - if (error != 0) - return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); - - if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) { - spa_load_failed(spa, "uberblock guid sum doesn't match MOS " - "guid sum (%llu != %llu)", - (u_longlong_t)spa->spa_uberblock.ub_guid_sum, - (u_longlong_t)rvd->vdev_guid_sum); - return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, - ENXIO)); - } +error = spa_check_for_missing_logs(spa); +if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); - return (0); +if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) { + spa_load_failed(spa, "uberblock guid sum doesn't match MOS " + "guid sum (%llu != %llu)", + (u_longlong_t)spa->spa_uberblock.ub_guid_sum, + (u_longlong_t)rvd->vdev_guid_sum); + return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, + ENXIO)); +} + +return (0); } static int spa_ld_open_indirect_vdev_metadata(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * Everything that we read before spa_remove_init() must be stored - * on concreted vdevs. Therefore we do this as early as possible. - */ - error = spa_remove_init(spa); - if (error != 0) { - spa_load_failed(spa, "spa_remove_init failed [error=%d]", - error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +/* + * Everything that we read before spa_remove_init() must be stored + * on concreted vdevs. Therefore we do this as early as possible. + */ +error = spa_remove_init(spa); +if (error != 0) { + spa_load_failed(spa, "spa_remove_init failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* - * Retrieve information needed to condense indirect vdev mappings. - */ - error = spa_condense_init(spa); - if (error != 0) { - spa_load_failed(spa, "spa_condense_init failed [error=%d]", - error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); - } +/* + * Retrieve information needed to condense indirect vdev mappings. + */ +error = spa_condense_init(spa); +if (error != 0) { + spa_load_failed(spa, "spa_condense_init failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); +} - return (0); +return (0); } static int spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; - - if (spa_version(spa) >= SPA_VERSION_FEATURES) { - boolean_t missing_feat_read = B_FALSE; - nvlist_t *unsup_feat, *enabled_feat; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, - &spa->spa_feat_for_read_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, - &spa->spa_feat_for_write_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (spa_version(spa) >= SPA_VERSION_FEATURES) { + boolean_t missing_feat_read = B_FALSE; + nvlist_t *unsup_feat, *enabled_feat; - if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, - &spa->spa_feat_desc_obj, B_TRUE) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } + if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, + &spa->spa_feat_for_read_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - enabled_feat = fnvlist_alloc(); - unsup_feat = fnvlist_alloc(); + if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, + &spa->spa_feat_for_write_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - if (!spa_features_check(spa, B_FALSE, - unsup_feat, enabled_feat)) - missing_feat_read = B_TRUE; + if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, + &spa->spa_feat_desc_obj, B_TRUE) != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } - if (spa_writeable(spa) || - spa->spa_load_state == SPA_LOAD_TRYIMPORT) { - if (!spa_features_check(spa, B_TRUE, - unsup_feat, enabled_feat)) { - *missing_feat_writep = B_TRUE; - } - } + enabled_feat = fnvlist_alloc(); + unsup_feat = fnvlist_alloc(); - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); + if (!spa_features_check(spa, B_FALSE, + unsup_feat, enabled_feat)) + missing_feat_read = B_TRUE; - if (!nvlist_empty(unsup_feat)) { - fnvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); + if (spa_writeable(spa) || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) { + if (!spa_features_check(spa, B_TRUE, + unsup_feat, enabled_feat)) { + *missing_feat_writep = B_TRUE; } + } - fnvlist_free(enabled_feat); - fnvlist_free(unsup_feat); + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); - if (!missing_feat_read) { - fnvlist_add_boolean(spa->spa_load_info, - ZPOOL_CONFIG_CAN_RDONLY); - } + if (!nvlist_empty(unsup_feat)) { + fnvlist_add_nvlist(spa->spa_load_info, + ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); + } - /* - * If the state is SPA_LOAD_TRYIMPORT, our objective is - * twofold: to determine whether the pool is available for - * import in read-write mode and (if it is not) whether the - * pool is available for import in read-only mode. If the pool - * is available for import in read-write mode, it is displayed - * as available in userland; if it is not available for import - * in read-only mode, it is displayed as unavailable in - * userland. If the pool is available for import in read-only - * mode but not read-write mode, it is displayed as unavailable - * in userland with a special note that the pool is actually - * available for open in read-only mode. - * - * As a result, if the state is SPA_LOAD_TRYIMPORT and we are - * missing a feature for write, we must first determine whether - * the pool can be opened read-only before returning to - * userland in order to know whether to display the - * abovementioned note. - */ - if (missing_feat_read || (*missing_feat_writep && - spa_writeable(spa))) { - spa_load_failed(spa, "pool uses unsupported features"); - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } + fnvlist_free(enabled_feat); + fnvlist_free(unsup_feat); - /* - * Load refcounts for ZFS features from disk into an in-memory - * cache during SPA initialization. - */ - for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { - uint64_t refcount; - - error = feature_get_refcount_from_disk(spa, - &spa_feature_table[i], &refcount); - if (error == 0) { - spa->spa_feat_refcount_cache[i] = refcount; - } else if (error == ENOTSUP) { - spa->spa_feat_refcount_cache[i] = - SPA_FEATURE_DISABLED; - } else { - spa_load_failed(spa, "error getting refcount " - "for feature %s [error=%d]", - spa_feature_table[i].fi_guid, error); - return (spa_vdev_err(rvd, - VDEV_AUX_CORRUPT_DATA, EIO)); - } - } + if (!missing_feat_read) { + fnvlist_add_boolean(spa->spa_load_info, + ZPOOL_CONFIG_CAN_RDONLY); } - if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { - if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, - &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* + * If the state is SPA_LOAD_TRYIMPORT, our objective is + * twofold: to determine whether the pool is available for + * import in read-write mode and (if it is not) whether the + * pool is available for import in read-only mode. If the pool + * is available for import in read-write mode, it is displayed + * as available in userland; if it is not available for import + * in read-only mode, it is displayed as unavailable in + * userland. If the pool is available for import in read-only + * mode but not read-write mode, it is displayed as unavailable + * in userland with a special note that the pool is actually + * available for open in read-only mode. + * + * As a result, if the state is SPA_LOAD_TRYIMPORT and we are + * missing a feature for write, we must first determine whether + * the pool can be opened read-only before returning to + * userland in order to know whether to display the + * abovementioned note. + */ + if (missing_feat_read || (*missing_feat_writep && + spa_writeable(spa))) { + spa_load_failed(spa, "pool uses unsupported features"); + return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); } /* - * Encryption was added before bookmark_v2, even though bookmark_v2 - * is now a dependency. If this pool has encryption enabled without - * bookmark_v2, trigger an errata message. + * Load refcounts for ZFS features from disk into an in-memory + * cache during SPA initialization. */ - if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && - !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) { - spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; + for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { + uint64_t refcount; + + error = feature_get_refcount_from_disk(spa, + &spa_feature_table[i], &refcount); + if (error == 0) { + spa->spa_feat_refcount_cache[i] = refcount; + } else if (error == ENOTSUP) { + spa->spa_feat_refcount_cache[i] = + SPA_FEATURE_DISABLED; + } else { + spa_load_failed(spa, "error getting refcount " + "for feature %s [error=%d]", + spa_feature_table[i].fi_guid, error); + return (spa_vdev_err(rvd, + VDEV_AUX_CORRUPT_DATA, EIO)); + } } +} - return (0); +if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { + if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, + &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} + +/* + * Encryption was added before bookmark_v2, even though bookmark_v2 + * is now a dependency. If this pool has encryption enabled without + * bookmark_v2, trigger an errata message. + */ +if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && + !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) { + spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; +} + +return (0); } static int spa_ld_load_special_directories(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - spa->spa_is_initializing = B_TRUE; - error = dsl_pool_open(spa->spa_dsl_pool); - spa->spa_is_initializing = B_FALSE; - if (error != 0) { - spa_load_failed(spa, "dsl_pool_open failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +spa->spa_is_initializing = B_TRUE; +error = dsl_pool_open(spa->spa_dsl_pool); +spa->spa_is_initializing = B_FALSE; +if (error != 0) { + spa_load_failed(spa, "dsl_pool_open failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - return (0); +return (0); } static int spa_ld_get_props(spa_t *spa) { - int error = 0; - uint64_t obj; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +uint64_t obj; +vdev_t *rvd = spa->spa_root_vdev; + +/* Grab the checksum salt from the MOS. */ +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHECKSUM_SALT, 1, + sizeof (spa->spa_cksum_salt.zcs_bytes), + spa->spa_cksum_salt.zcs_bytes); +if (error == ENOENT) { + /* Generate a new salt for subsequent use */ + (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); +} else if (error != 0) { + spa_load_failed(spa, "unable to retrieve checksum salt from " + "MOS [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - /* Grab the checksum salt from the MOS. */ - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_CHECKSUM_SALT, 1, - sizeof (spa->spa_cksum_salt.zcs_bytes), - spa->spa_cksum_salt.zcs_bytes); - if (error == ENOENT) { - /* Generate a new salt for subsequent use */ - (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, - sizeof (spa->spa_cksum_salt.zcs_bytes)); - } else if (error != 0) { - spa_load_failed(spa, "unable to retrieve checksum salt from " - "MOS [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj, B_TRUE) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); +if (error != 0) { + spa_load_failed(spa, "error opening deferred-frees bpobj " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj, B_TRUE) != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); - if (error != 0) { - spa_load_failed(spa, "error opening deferred-frees bpobj " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +/* + * Load the bit that tells us to use the new accounting function + * (raid-z deflation). If we have an older pool, this will not + * be present. + */ +error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * Load the bit that tells us to use the new accounting function - * (raid-z deflation). If we have an older pool, this will not - * be present. - */ - error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, + &spa->spa_creation_version, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, - &spa->spa_creation_version, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * Load the persistent error log. If we have an older pool, this will + * not be present. + */ +error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last, + B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - /* - * Load the persistent error log. If we have an older pool, this will - * not be present. - */ - error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last, - B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, + &spa->spa_errlog_scrub, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, - &spa->spa_errlog_scrub, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * Load the history object. If we have an older pool, this + * will not be present. + */ +error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +/* + * Load the per-vdev ZAP map. If we have an older pool, this will not + * be present; in this case, defer its creation to a later time to + * avoid dirtying the MOS this early / out of sync context. See + * spa_sync_config_object. + */ + +/* The sentinel is only available in the MOS config. */ +nvlist_t *mos_config; +if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { + spa_load_failed(spa, "unable to retrieve MOS config"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} + +error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, + &spa->spa_all_vdev_zaps, B_FALSE); + +if (error == ENOENT) { + VERIFY(!nvlist_exists(mos_config, + ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); + spa->spa_avz_action = AVZ_ACTION_INITIALIZE; + ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); +} else if (error != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { /* - * Load the history object. If we have an older pool, this - * will not be present. + * An older version of ZFS overwrote the sentinel value, so + * we have orphaned per-vdev ZAPs in the MOS. Defer their + * destruction to later; see spa_sync_config_object. */ - error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - + spa->spa_avz_action = AVZ_ACTION_DESTROY; /* - * Load the per-vdev ZAP map. If we have an older pool, this will not - * be present; in this case, defer its creation to a later time to - * avoid dirtying the MOS this early / out of sync context. See - * spa_sync_config_object. + * We're assuming that no vdevs have had their ZAPs created + * before this. Better be sure of it. */ + ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); +} +nvlist_free(mos_config); - /* The sentinel is only available in the MOS config. */ - nvlist_t *mos_config; - if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { - spa_load_failed(spa, "unable to retrieve MOS config"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, - &spa->spa_all_vdev_zaps, B_FALSE); - - if (error == ENOENT) { - VERIFY(!nvlist_exists(mos_config, - ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); - spa->spa_avz_action = AVZ_ACTION_INITIALIZE; - ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); - } else if (error != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { - /* - * An older version of ZFS overwrote the sentinel value, so - * we have orphaned per-vdev ZAPs in the MOS. Defer their - * destruction to later; see spa_sync_config_object. - */ - spa->spa_avz_action = AVZ_ACTION_DESTROY; - /* - * We're assuming that no vdevs have had their ZAPs created - * before this. Better be sure of it. - */ - ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); - } - nvlist_free(mos_config); - - spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); +spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); - error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object, - B_FALSE); - if (error && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object, + B_FALSE); +if (error && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == 0) { - uint64_t autoreplace; +if (error == 0) { + uint64_t autoreplace; - spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); - spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); - spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); - spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); - spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); - spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); - spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, - &spa->spa_dedup_ditto); - spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); - spa->spa_autoreplace = (autoreplace != 0); - } + spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); + spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); + spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); + spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); + spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); + spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, + &spa->spa_dedup_ditto); + spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); + spa->spa_autoreplace = (autoreplace != 0); +} - /* - * If we are importing a pool with missing top-level vdevs, - * we enforce that the pool doesn't panic or get suspended on - * error since the likelihood of missing data is extremely high. - */ - if (spa->spa_missing_tvds > 0 && - spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE && - spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - spa_load_note(spa, "forcing failmode to 'continue' " - "as some top level vdevs are missing"); - spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE; - } +/* + * If we are importing a pool with missing top-level vdevs, + * we enforce that the pool doesn't panic or get suspended on + * error since the likelihood of missing data is extremely high. + */ +if (spa->spa_missing_tvds > 0 && + spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE && + spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + spa_load_note(spa, "forcing failmode to 'continue' " + "as some top level vdevs are missing"); + spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE; +} - return (0); +return (0); } static int spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If we're assembling the pool from the split-off vdevs of - * an existing pool, we don't want to attach the spares & cache - * devices. - */ +/* + * If we're assembling the pool from the split-off vdevs of + * an existing pool, we don't want to attach the spares & cache + * devices. + */ - /* - * Load any hot spares for this pool. - */ - error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object, - B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); - if (load_nvlist(spa, spa->spa_spares.sav_object, - &spa->spa_spares.sav_config) != 0) { - spa_load_failed(spa, "error loading spares nvlist"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - } else if (error == 0) { - spa->spa_spares.sav_sync = B_TRUE; +/* + * Load any hot spares for this pool. + */ +error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object, + B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); + if (load_nvlist(spa, spa->spa_spares.sav_object, + &spa->spa_spares.sav_config) != 0) { + spa_load_failed(spa, "error loading spares nvlist"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } - /* - * Load any level 2 ARC devices for this pool. - */ - error = spa_dir_prop(spa, DMU_POOL_L2CACHE, - &spa->spa_l2cache.sav_object, B_FALSE); - if (error != 0 && error != ENOENT) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { - ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); - if (load_nvlist(spa, spa->spa_l2cache.sav_object, - &spa->spa_l2cache.sav_config) != 0) { - spa_load_failed(spa, "error loading l2cache nvlist"); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); +} else if (error == 0) { + spa->spa_spares.sav_sync = B_TRUE; +} - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - } else if (error == 0) { - spa->spa_l2cache.sav_sync = B_TRUE; +/* + * Load any level 2 ARC devices for this pool. + */ +error = spa_dir_prop(spa, DMU_POOL_L2CACHE, + &spa->spa_l2cache.sav_object, B_FALSE); +if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { + ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); + if (load_nvlist(spa, spa->spa_l2cache.sav_object, + &spa->spa_l2cache.sav_config) != 0) { + spa_load_failed(spa, "error loading l2cache nvlist"); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } - return (0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); +} else if (error == 0) { + spa->spa_l2cache.sav_sync = B_TRUE; +} + +return (0); } static int spa_ld_load_vdev_metadata(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * If the 'multihost' property is set, then never allow a pool to - * be imported when the system hostid is zero. The exception to - * this rule is zdb which is always allowed to access pools. - */ - if (spa_multihost(spa) && spa_get_hostid() == 0 && - (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { - fnvlist_add_uint64(spa->spa_load_info, - ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); - return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); - } +/* + * If the 'multihost' property is set, then never allow a pool to + * be imported when the system hostid is zero. The exception to + * this rule is zdb which is always allowed to access pools. + */ +if (spa_multihost(spa) && spa_get_hostid(spa) == 0 && + (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); +} +/* + * If the 'autoreplace' property is set, then post a resource notifying + * the ZFS DE that it should not issue any faults for unopenable + * devices. We also iterate over the vdevs, and post a sysevent for any + * unopenable vdevs so that the normal autoreplace handler can take + * over. + */ +if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + spa_check_removed(spa->spa_root_vdev); /* - * If the 'autoreplace' property is set, then post a resource notifying - * the ZFS DE that it should not issue any faults for unopenable - * devices. We also iterate over the vdevs, and post a sysevent for any - * unopenable vdevs so that the normal autoreplace handler can take - * over. + * For the import case, this is done in spa_import(), because + * at this point we're using the spare definitions from + * the MOS config, not necessarily from the userland config. */ - if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - spa_check_removed(spa->spa_root_vdev); - /* - * For the import case, this is done in spa_import(), because - * at this point we're using the spare definitions from - * the MOS config, not necessarily from the userland config. - */ - if (spa->spa_load_state != SPA_LOAD_IMPORT) { - spa_aux_check_removed(&spa->spa_spares); - spa_aux_check_removed(&spa->spa_l2cache); - } + if (spa->spa_load_state != SPA_LOAD_IMPORT) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); } +} - /* - * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc. - */ - error = vdev_load(rvd); - if (error != 0) { - spa_load_failed(spa, "vdev_load failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); - } +/* + * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc. + */ +error = vdev_load(rvd); +if (error != 0) { + spa_load_failed(spa, "vdev_load failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); +} - /* - * Propagate the leaf DTLs we just loaded all the way up the vdev tree. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - vdev_dtl_reassess(rvd, 0, 0, B_FALSE); - spa_config_exit(spa, SCL_ALL, FTAG); +/* + * Propagate the leaf DTLs we just loaded all the way up the vdev tree. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +vdev_dtl_reassess(rvd, 0, 0, B_FALSE); +spa_config_exit(spa, SCL_ALL, FTAG); - return (0); +return (0); } static int spa_ld_load_dedup_tables(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - error = ddt_load(spa); - if (error != 0) { - spa_load_failed(spa, "ddt_load failed [error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } +error = ddt_load(spa); +if (error != 0) { + spa_load_failed(spa, "ddt_load failed [error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); +} - return (0); +return (0); } static int spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport) { - vdev_t *rvd = spa->spa_root_vdev; +vdev_t *rvd = spa->spa_root_vdev; - if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) { - boolean_t missing = spa_check_logs(spa); - if (missing) { - if (spa->spa_missing_tvds != 0) { - spa_load_note(spa, "spa_check_logs failed " - "so dropping the logs"); - } else { - *ereport = FM_EREPORT_ZFS_LOG_REPLAY; - spa_load_failed(spa, "spa_check_logs failed"); - return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, - ENXIO)); - } +if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) { + boolean_t missing = spa_check_logs(spa); + if (missing) { + if (spa->spa_missing_tvds != 0) { + spa_load_note(spa, "spa_check_logs failed " + "so dropping the logs"); + } else { + *ereport = FM_EREPORT_ZFS_LOG_REPLAY; + spa_load_failed(spa, "spa_check_logs failed"); + return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, + ENXIO)); } } +} - return (0); +return (0); } static int spa_ld_verify_pool_data(spa_t *spa) { - int error = 0; - vdev_t *rvd = spa->spa_root_vdev; +int error = 0; +vdev_t *rvd = spa->spa_root_vdev; - /* - * We've successfully opened the pool, verify that we're ready - * to start pushing transactions. - */ - if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) { - error = spa_load_verify(spa); - if (error != 0) { - spa_load_failed(spa, "spa_load_verify failed " - "[error=%d]", error); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - error)); - } +/* + * We've successfully opened the pool, verify that we're ready + * to start pushing transactions. + */ +if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) { + error = spa_load_verify(spa); + if (error != 0) { + spa_load_failed(spa, "spa_load_verify failed " + "[error=%d]", error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, + error)); } +} - return (0); +return (0); } static void spa_ld_claim_log_blocks(spa_t *spa) { - dmu_tx_t *tx; - dsl_pool_t *dp = spa_get_dsl(spa); +dmu_tx_t *tx; +dsl_pool_t *dp = spa_get_dsl(spa); - /* - * Claim log blocks that haven't been committed yet. - * This must all happen in a single txg. - * Note: spa_claim_max_txg is updated by spa_claim_notify(), - * invoked from zil_claim_log_block()'s i/o done callback. - * Price of rollback is that we abandon the log. - */ - spa->spa_claiming = B_TRUE; +/* + * Claim log blocks that haven't been committed yet. + * This must all happen in a single txg. + * Note: spa_claim_max_txg is updated by spa_claim_notify(), + * invoked from zil_claim_log_block()'s i/o done callback. + * Price of rollback is that we abandon the log. + */ +spa->spa_claiming = B_TRUE; - tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); - (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, - zil_claim, tx, DS_FIND_CHILDREN); - dmu_tx_commit(tx); +tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); +(void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_claim, tx, DS_FIND_CHILDREN); +dmu_tx_commit(tx); - spa->spa_claiming = B_FALSE; +spa->spa_claiming = B_FALSE; - spa_set_log_state(spa, SPA_LOG_GOOD); +spa_set_log_state(spa, SPA_LOG_GOOD); } static void spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, - boolean_t update_config_cache) +boolean_t update_config_cache) { - vdev_t *rvd = spa->spa_root_vdev; - int need_update = B_FALSE; +vdev_t *rvd = spa->spa_root_vdev; +int need_update = B_FALSE; - /* - * If the config cache is stale, or we have uninitialized - * metaslabs (see spa_vdev_add()), then update the config. - * - * If this is a verbatim import, trust the current - * in-core spa_config and update the disk labels. - */ - if (update_config_cache || config_cache_txg != spa->spa_config_txg || - spa->spa_load_state == SPA_LOAD_IMPORT || - spa->spa_load_state == SPA_LOAD_RECOVER || - (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) +/* + * If the config cache is stale, or we have uninitialized + * metaslabs (see spa_vdev_add()), then update the config. + * + * If this is a verbatim import, trust the current + * in-core spa_config and update the disk labels. + */ +if (update_config_cache || config_cache_txg != spa->spa_config_txg || + spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_RECOVER || + (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) + need_update = B_TRUE; + +for (int c = 0; c < rvd->vdev_children; c++) + if (rvd->vdev_child[c]->vdev_ms_array == 0) need_update = B_TRUE; - for (int c = 0; c < rvd->vdev_children; c++) - if (rvd->vdev_child[c]->vdev_ms_array == 0) - need_update = B_TRUE; - - /* - * Update the config cache asychronously in case we're the - * root pool, in which case the config cache isn't writable yet. - */ - if (need_update) - spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); +/* + * Update the config cache asynchronously in case we're the + * root pool, in which case the config cache isn't writable yet. + */ +if (need_update) + spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } static void spa_ld_prepare_for_reload(spa_t *spa) { - int mode = spa->spa_mode; - int async_suspended = spa->spa_async_suspended; +int mode = spa->spa_mode; +int async_suspended = spa->spa_async_suspended; - spa_unload(spa); - spa_deactivate(spa); - spa_activate(spa, mode); +spa_unload(spa); +spa_deactivate(spa); +spa_activate(spa, mode); - /* - * We save the value of spa_async_suspended as it gets reset to 0 by - * spa_unload(). We want to restore it back to the original value before - * returning as we might be calling spa_async_resume() later. - */ - spa->spa_async_suspended = async_suspended; +/* + * We save the value of spa_async_suspended as it gets reset to 0 by + * spa_unload(). We want to restore it back to the original value before + * returning as we might be calling spa_async_resume() later. + */ +spa->spa_async_suspended = async_suspended; } static int spa_ld_read_checkpoint_txg(spa_t *spa) { - uberblock_t checkpoint; - int error = 0; +uberblock_t checkpoint; +int error = 0; - ASSERT0(spa->spa_checkpoint_txg); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT0(spa->spa_checkpoint_txg); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), - sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - if (error == ENOENT) - return (0); +if (error == ENOENT) + return (0); - if (error != 0) - return (error); +if (error != 0) + return (error); - ASSERT3U(checkpoint.ub_txg, !=, 0); - ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); - ASSERT3U(checkpoint.ub_timestamp, !=, 0); - spa->spa_checkpoint_txg = checkpoint.ub_txg; - spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; +ASSERT3U(checkpoint.ub_txg, !=, 0); +ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); +ASSERT3U(checkpoint.ub_timestamp, !=, 0); +spa->spa_checkpoint_txg = checkpoint.ub_txg; +spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; - return (0); +return (0); } static int spa_ld_mos_init(spa_t *spa, spa_import_type_t type) { - int error = 0; - - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); +int error = 0; - /* - * Never trust the config that is provided unless we are assembling - * a pool following a split. - * This means don't trust blkptrs and the vdev tree in general. This - * also effectively puts the spa in read-only mode since - * spa_writeable() checks for spa_trust_config to be true. - * We will later load a trusted config from the MOS. - */ - if (type != SPA_IMPORT_ASSEMBLE) - spa->spa_trust_config = B_FALSE; +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); - /* - * Parse the config provided to create a vdev tree. - */ - error = spa_ld_parse_config(spa, type); - if (error != 0) - return (error); +/* + * Never trust the config that is provided unless we are assembling + * a pool following a split. + * This means don't trust blkptrs and the vdev tree in general. This + * also effectively puts the spa in read-only mode since + * spa_writeable() checks for spa_trust_config to be true. + * We will later load a trusted config from the MOS. + */ +if (type != SPA_IMPORT_ASSEMBLE) + spa->spa_trust_config = B_FALSE; - spa_import_progress_add(spa); +/* + * Parse the config provided to create a vdev tree. + */ +error = spa_ld_parse_config(spa, type); +if (error != 0) + return (error); - /* - * Now that we have the vdev tree, try to open each vdev. This involves - * opening the underlying physical device, retrieving its geometry and - * probing the vdev with a dummy I/O. The state of each vdev will be set - * based on the success of those operations. After this we'll be ready - * to read from the vdevs. - */ - error = spa_ld_open_vdevs(spa); - if (error != 0) - return (error); +spa_import_progress_add(spa); - /* - * Read the label of each vdev and make sure that the GUIDs stored - * there match the GUIDs in the config provided. - * If we're assembling a new pool that's been split off from an - * existing pool, the labels haven't yet been updated so we skip - * validation for now. - */ - if (type != SPA_IMPORT_ASSEMBLE) { - error = spa_ld_validate_vdevs(spa); - if (error != 0) - return (error); - } +/* + * Now that we have the vdev tree, try to open each vdev. This involves + * opening the underlying physical device, retrieving its geometry and + * probing the vdev with a dummy I/O. The state of each vdev will be set + * based on the success of those operations. After this we'll be ready + * to read from the vdevs. + */ +error = spa_ld_open_vdevs(spa); +if (error != 0) + return (error); - /* - * Read all vdev labels to find the best uberblock (i.e. latest, - * unless spa_load_max_txg is set) and store it in spa_uberblock. We - * get the list of features required to read blkptrs in the MOS from - * the vdev label with the best uberblock and verify that our version - * of zfs supports them all. - */ - error = spa_ld_select_uberblock(spa, type); +/* + * Read the label of each vdev and make sure that the GUIDs stored + * there match the GUIDs in the config provided. + * If we're assembling a new pool that's been split off from an + * existing pool, the labels haven't yet been updated so we skip + * validation for now. + */ +if (type != SPA_IMPORT_ASSEMBLE) { + error = spa_ld_validate_vdevs(spa); if (error != 0) return (error); +} - /* - * Pass that uberblock to the dsl_pool layer which will open the root - * blkptr. This blkptr points to the latest version of the MOS and will - * allow us to read its contents. - */ - error = spa_ld_open_rootbp(spa); - if (error != 0) - return (error); +/* + * Read all vdev labels to find the best uberblock (i.e. latest, + * unless spa_load_max_txg is set) and store it in spa_uberblock. We + * get the list of features required to read blkptrs in the MOS from + * the vdev label with the best uberblock and verify that our version + * of zfs supports them all. + */ +error = spa_ld_select_uberblock(spa, type); +if (error != 0) + return (error); - return (0); +/* + * Pass that uberblock to the dsl_pool layer which will open the root + * blkptr. This blkptr points to the latest version of the MOS and will + * allow us to read its contents. + */ +error = spa_ld_open_rootbp(spa); +if (error != 0) + return (error); + +return (0); } static int spa_ld_checkpoint_rewind(spa_t *spa) { - uberblock_t checkpoint; - int error = 0; +uberblock_t checkpoint; +int error = 0; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), - sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); +error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - if (error != 0) { - spa_load_failed(spa, "unable to retrieve checkpointed " - "uberblock from the MOS config [error=%d]", error); +if (error != 0) { + spa_load_failed(spa, "unable to retrieve checkpointed " + "uberblock from the MOS config [error=%d]", error); - if (error == ENOENT) - error = ZFS_ERR_NO_CHECKPOINT; + if (error == ENOENT) + error = ZFS_ERR_NO_CHECKPOINT; - return (error); - } + return (error); +} - ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); - ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); +ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); +ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); - /* - * We need to update the txg and timestamp of the checkpointed - * uberblock to be higher than the latest one. This ensures that - * the checkpointed uberblock is selected if we were to close and - * reopen the pool right after we've written it in the vdev labels. - * (also see block comment in vdev_uberblock_compare) - */ - checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; - checkpoint.ub_timestamp = gethrestime_sec(); +/* + * We need to update the txg and timestamp of the checkpointed + * uberblock to be higher than the latest one. This ensures that + * the checkpointed uberblock is selected if we were to close and + * reopen the pool right after we've written it in the vdev labels. + * (also see block comment in vdev_uberblock_compare) + */ +checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; +checkpoint.ub_timestamp = gethrestime_sec(); - /* - * Set current uberblock to be the checkpointed uberblock. - */ - spa->spa_uberblock = checkpoint; +/* + * Set current uberblock to be the checkpointed uberblock. + */ +spa->spa_uberblock = checkpoint; - /* - * If we are doing a normal rewind, then the pool is open for - * writing and we sync the "updated" checkpointed uberblock to - * disk. Once this is done, we've basically rewound the whole - * pool and there is no way back. - * - * There are cases when we don't want to attempt and sync the - * checkpointed uberblock to disk because we are opening a - * pool as read-only. Specifically, verifying the checkpointed - * state with zdb, and importing the checkpointed state to get - * a "preview" of its content. - */ - if (spa_writeable(spa)) { - vdev_t *rvd = spa->spa_root_vdev; +/* + * If we are doing a normal rewind, then the pool is open for + * writing and we sync the "updated" checkpointed uberblock to + * disk. Once this is done, we've basically rewound the whole + * pool and there is no way back. + * + * There are cases when we don't want to attempt and sync the + * checkpointed uberblock to disk because we are opening a + * pool as read-only. Specifically, verifying the checkpointed + * state with zdb, and importing the checkpointed state to get + * a "preview" of its content. + */ +if (spa_writeable(spa)) { + vdev_t *rvd = spa->spa_root_vdev; - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; - int svdcount = 0; - int children = rvd->vdev_children; - int c0 = spa_get_random(children); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; + int svdcount = 0; + int children = rvd->vdev_children; + int c0 = spa_get_random(children); - for (int c = 0; c < children; c++) { - vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; + for (int c = 0; c < children; c++) { + vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; - /* Stop when revisiting the first vdev */ - if (c > 0 && svd[0] == vd) - break; + /* Stop when revisiting the first vdev */ + if (c > 0 && svd[0] == vd) + break; - if (vd->vdev_ms_array == 0 || vd->vdev_islog || - !vdev_is_concrete(vd)) - continue; + if (vd->vdev_ms_array == 0 || vd->vdev_islog || + !vdev_is_concrete(vd)) + continue; - svd[svdcount++] = vd; - if (svdcount == SPA_SYNC_MIN_VDEVS) - break; - } - error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); - if (error == 0) - spa->spa_last_synced_guid = rvd->vdev_guid; - spa_config_exit(spa, SCL_ALL, FTAG); + svd[svdcount++] = vd; + if (svdcount == SPA_SYNC_MIN_VDEVS) + break; + } + error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); + if (error == 0) + spa->spa_last_synced_guid = rvd->vdev_guid; + spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0) { - spa_load_failed(spa, "failed to write checkpointed " - "uberblock to the vdev labels [error=%d]", error); - return (error); - } + if (error != 0) { + spa_load_failed(spa, "failed to write checkpointed " + "uberblock to the vdev labels [error=%d]", error); + return (error); } +} - return (0); +return (0); } static int spa_ld_mos_with_trusted_config(spa_t *spa, spa_import_type_t type, - boolean_t *update_config_cache) +boolean_t *update_config_cache) { - int error; +int error; + +/* + * Parse the config for pool, open and validate vdevs, + * select an uberblock, and use that uberblock to open + * the MOS. + */ +error = spa_ld_mos_init(spa, type); +if (error != 0) + return (error); + +/* + * Retrieve the trusted config stored in the MOS and use it to create + * a new, exact version of the vdev tree, then reopen all vdevs. + */ +error = spa_ld_trusted_config(spa, type, B_FALSE); +if (error == EAGAIN) { + if (update_config_cache != NULL) + *update_config_cache = B_TRUE; /* - * Parse the config for pool, open and validate vdevs, - * select an uberblock, and use that uberblock to open - * the MOS. + * Redo the loading process with the trusted config if it is + * too different from the untrusted config. */ + spa_ld_prepare_for_reload(spa); + spa_load_note(spa, "RELOADING"); error = spa_ld_mos_init(spa, type); if (error != 0) return (error); - /* - * Retrieve the trusted config stored in the MOS and use it to create - * a new, exact version of the vdev tree, then reopen all vdevs. - */ - error = spa_ld_trusted_config(spa, type, B_FALSE); - if (error == EAGAIN) { - if (update_config_cache != NULL) - *update_config_cache = B_TRUE; - - /* - * Redo the loading process with the trusted config if it is - * too different from the untrusted config. - */ - spa_ld_prepare_for_reload(spa); - spa_load_note(spa, "RELOADING"); - error = spa_ld_mos_init(spa, type); - if (error != 0) - return (error); - - error = spa_ld_trusted_config(spa, type, B_TRUE); - if (error != 0) - return (error); - - } else if (error != 0) { + error = spa_ld_trusted_config(spa, type, B_TRUE); + if (error != 0) return (error); - } - return (0); +} else if (error != 0) { + return (error); +} + +return (0); } /* - * Load an existing storage pool, using the config provided. This config - * describes which vdevs are part of the pool and is later validated against - * partial configs present in each vdev's label and an entire copy of the - * config stored in the MOS. - */ +* Load an existing storage pool, using the config provided. This config +* describes which vdevs are part of the pool and is later validated against +* partial configs present in each vdev's label and an entire copy of the +* config stored in the MOS. +*/ static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) { - int error = 0; - boolean_t missing_feat_write = B_FALSE; - boolean_t checkpoint_rewind = - (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - boolean_t update_config_cache = B_FALSE; +int error = 0; +boolean_t missing_feat_write = B_FALSE; +boolean_t checkpoint_rewind = + (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); +boolean_t update_config_cache = B_FALSE; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); - - spa_load_note(spa, "LOADING"); - - error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); - if (error != 0) - return (error); - - /* - * If we are rewinding to the checkpoint then we need to repeat - * everything we've done so far in this function but this time - * selecting the checkpointed uberblock and using that to open - * the MOS. - */ - if (checkpoint_rewind) { - /* - * If we are rewinding to the checkpoint update config cache - * anyway. - */ - update_config_cache = B_TRUE; +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); - /* - * Extract the checkpointed uberblock from the current MOS - * and use this as the pool's uberblock from now on. If the - * pool is imported as writeable we also write the checkpoint - * uberblock to the labels, making the rewind permanent. - */ - error = spa_ld_checkpoint_rewind(spa); - if (error != 0) - return (error); +spa_load_note(spa, "LOADING"); - /* - * Redo the loading process process again with the - * checkpointed uberblock. - */ - spa_ld_prepare_for_reload(spa); - spa_load_note(spa, "LOADING checkpointed uberblock"); - error = spa_ld_mos_with_trusted_config(spa, type, NULL); - if (error != 0) - return (error); - } +error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); +if (error != 0) + return (error); +/* + * If we are rewinding to the checkpoint then we need to repeat + * everything we've done so far in this function but this time + * selecting the checkpointed uberblock and using that to open + * the MOS. + */ +if (checkpoint_rewind) { /* - * Retrieve the checkpoint txg if the pool has a checkpoint. + * If we are rewinding to the checkpoint update config cache + * anyway. */ - error = spa_ld_read_checkpoint_txg(spa); - if (error != 0) - return (error); + update_config_cache = B_TRUE; /* - * Retrieve the mapping of indirect vdevs. Those vdevs were removed - * from the pool and their contents were re-mapped to other vdevs. Note - * that everything that we read before this step must have been - * rewritten on concrete vdevs after the last device removal was - * initiated. Otherwise we could be reading from indirect vdevs before - * we have loaded their mappings. + * Extract the checkpointed uberblock from the current MOS + * and use this as the pool's uberblock from now on. If the + * pool is imported as writeable we also write the checkpoint + * uberblock to the labels, making the rewind permanent. */ - error = spa_ld_open_indirect_vdev_metadata(spa); + error = spa_ld_checkpoint_rewind(spa); if (error != 0) return (error); /* - * Retrieve the full list of active features from the MOS and check if - * they are all supported. + * Redo the loading process again with the + * checkpointed uberblock. */ - error = spa_ld_check_features(spa, &missing_feat_write); + spa_ld_prepare_for_reload(spa); + spa_load_note(spa, "LOADING checkpointed uberblock"); + error = spa_ld_mos_with_trusted_config(spa, type, NULL); if (error != 0) return (error); +} + +/* + * Retrieve the checkpoint txg if the pool has a checkpoint. + */ +error = spa_ld_read_checkpoint_txg(spa); +if (error != 0) + return (error); + +/* + * Retrieve the mapping of indirect vdevs. Those vdevs were removed + * from the pool and their contents were re-mapped to other vdevs. Note + * that everything that we read before this step must have been + * rewritten on concrete vdevs after the last device removal was + * initiated. Otherwise we could be reading from indirect vdevs before + * we have loaded their mappings. + */ +error = spa_ld_open_indirect_vdev_metadata(spa); +if (error != 0) + return (error); + +/* + * Retrieve the full list of active features from the MOS and check if + * they are all supported. + */ +error = spa_ld_check_features(spa, &missing_feat_write); +if (error != 0) + return (error); + +/* + * Load several special directories from the MOS needed by the dsl_pool + * layer. + */ +error = spa_ld_load_special_directories(spa); +if (error != 0) + return (error); + +/* + * Retrieve pool properties from the MOS. + */ +error = spa_ld_get_props(spa); +if (error != 0) + return (error); + +/* + * Retrieve the list of auxiliary devices - cache devices and spares - + * and open them. + */ +error = spa_ld_open_aux_vdevs(spa, type); +if (error != 0) + return (error); + +/* + * Load the metadata for all vdevs. Also check if unopenable devices + * should be autoreplaced. + */ +error = spa_ld_load_vdev_metadata(spa); +if (error != 0) + return (error); + +error = spa_ld_load_dedup_tables(spa); +if (error != 0) + return (error); + +/* + * Verify the logs now to make sure we don't have any unexpected errors + * when we claim log blocks later. + */ +error = spa_ld_verify_logs(spa, type, ereport); +if (error != 0) + return (error); + +if (missing_feat_write) { + ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); /* - * Load several special directories from the MOS needed by the dsl_pool - * layer. + * At this point, we know that we can open the pool in + * read-only mode but not read-write mode. We now have enough + * information and can return to userland. */ - error = spa_ld_load_special_directories(spa); - if (error != 0) - return (error); + return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, + ENOTSUP)); +} + +/* + * Traverse the last txgs to make sure the pool was left off in a safe + * state. When performing an extreme rewind, we verify the whole pool, + * which can take a very long time. + */ +error = spa_ld_verify_pool_data(spa); +if (error != 0) + return (error); + +/* + * Calculate the deflated space for the pool. This must be done before + * we write anything to the pool because we'd need to update the space + * accounting using the deflated sizes. + */ +spa_update_dspace(spa); + +/* + * We have now retrieved all the information we needed to open the + * pool. If we are importing the pool in read-write mode, a few + * additional steps must be performed to finish the import. + */ +if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || + spa->spa_load_max_txg == UINT64_MAX)) { + uint64_t config_cache_txg = spa->spa_config_txg; + + ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); /* - * Retrieve pool properties from the MOS. + * In case of a checkpoint rewind, log the original txg + * of the checkpointed uberblock. */ - error = spa_ld_get_props(spa); - if (error != 0) - return (error); + if (checkpoint_rewind) { + spa_history_log_internal(spa, "checkpoint rewind", + NULL, "rewound state to txg=%llu", + (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); + } /* - * Retrieve the list of auxiliary devices - cache devices and spares - - * and open them. + * Traverse the ZIL and claim all blocks. */ - error = spa_ld_open_aux_vdevs(spa, type); - if (error != 0) - return (error); + spa_ld_claim_log_blocks(spa); /* - * Load the metadata for all vdevs. Also check if unopenable devices - * should be autoreplaced. + * Kick-off the syncing thread. */ - error = spa_ld_load_vdev_metadata(spa); - if (error != 0) - return (error); - - error = spa_ld_load_dedup_tables(spa); - if (error != 0) - return (error); + spa->spa_sync_on = B_TRUE; + txg_sync_start(spa->spa_dsl_pool); + mmp_thread_start(spa); /* - * Verify the logs now to make sure we don't have any unexpected errors - * when we claim log blocks later. + * Wait for all claims to sync. We sync up to the highest + * claimed log block birth time so that claimed log blocks + * don't appear to be from the future. spa_claim_max_txg + * will have been set for us by ZIL traversal operations + * performed above. */ - error = spa_ld_verify_logs(spa, type, ereport); - if (error != 0) - return (error); - - if (missing_feat_write) { - ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); - - /* - * At this point, we know that we can open the pool in - * read-only mode but not read-write mode. We now have enough - * information and can return to userland. - */ - return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } + txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* - * Traverse the last txgs to make sure the pool was left off in a safe - * state. When performing an extreme rewind, we verify the whole pool, - * which can take a very long time. + * Check if we need to request an update of the config. On the + * next sync, we would update the config stored in vdev labels + * and the cachefile (by default /etc/zfs/zpool.cache). */ - error = spa_ld_verify_pool_data(spa); - if (error != 0) - return (error); + spa_ld_check_for_config_update(spa, config_cache_txg, + update_config_cache); /* - * Calculate the deflated space for the pool. This must be done before - * we write anything to the pool because we'd need to update the space - * accounting using the deflated sizes. + * Check all DTLs to see if anything needs resilvering. */ - spa_update_dspace(spa); + if (!dsl_scan_resilvering(spa->spa_dsl_pool) && + vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + spa_async_request(spa, SPA_ASYNC_RESILVER); /* - * We have now retrieved all the information we needed to open the - * pool. If we are importing the pool in read-write mode, a few - * additional steps must be performed to finish the import. + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). */ - if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || - spa->spa_load_max_txg == UINT64_MAX)) { - uint64_t config_cache_txg = spa->spa_config_txg; - - ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); - - /* - * In case of a checkpoint rewind, log the original txg - * of the checkpointed uberblock. - */ - if (checkpoint_rewind) { - spa_history_log_internal(spa, "checkpoint rewind", - NULL, "rewound state to txg=%llu", - (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); - } - - /* - * Traverse the ZIL and claim all blocks. - */ - spa_ld_claim_log_blocks(spa); - - /* - * Kick-off the syncing thread. - */ - spa->spa_sync_on = B_TRUE; - txg_sync_start(spa->spa_dsl_pool); - mmp_thread_start(spa); - - /* - * Wait for all claims to sync. We sync up to the highest - * claimed log block birth time so that claimed log blocks - * don't appear to be from the future. spa_claim_max_txg - * will have been set for us by ZIL traversal operations - * performed above. - */ - txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); - - /* - * Check if we need to request an update of the config. On the - * next sync, we would update the config stored in vdev labels - * and the cachefile (by default /etc/zfs/zpool.cache). - */ - spa_ld_check_for_config_update(spa, config_cache_txg, - update_config_cache); - - /* - * Check all DTLs to see if anything needs resilvering. - */ - if (!dsl_scan_resilvering(spa->spa_dsl_pool) && - vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - spa_async_request(spa, SPA_ASYNC_RESILVER); - - /* - * Log the fact that we booted up (so that we can detect if - * we rebooted in the middle of an operation). - */ - spa_history_log_version(spa, "open", NULL); + spa_history_log_version(spa, "open", NULL); - spa_restart_removal(spa); - spa_spawn_aux_threads(spa); + spa_restart_removal(spa); + spa_spawn_aux_threads(spa); - /* - * Delete any inconsistent datasets. - * - * Note: - * Since we may be issuing deletes for clones here, - * we make sure to do so after we've spawned all the - * auxiliary threads above (from which the livelist - * deletion zthr is part of). - */ - (void) dmu_objset_find(spa_name(spa), - dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); + /* + * Delete any inconsistent datasets. + * + * Note: + * Since we may be issuing deletes for clones here, + * we make sure to do so after we've spawned all the + * auxiliary threads above (from which the livelist + * deletion zthr is part of). + */ + (void) dmu_objset_find(spa_name(spa), + dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); - /* - * Clean up any stale temporary dataset userrefs. - */ - dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); + /* + * Clean up any stale temporary dataset userrefs. + */ + dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - vdev_initialize_restart(spa->spa_root_vdev); - vdev_trim_restart(spa->spa_root_vdev); - vdev_autotrim_restart(spa); - spa_config_exit(spa, SCL_CONFIG, FTAG); - } + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + vdev_initialize_restart(spa->spa_root_vdev); + vdev_trim_restart(spa->spa_root_vdev); + vdev_autotrim_restart(spa); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} - spa_import_progress_remove(spa_guid(spa)); - spa_load_note(spa, "LOADED"); +spa_import_progress_remove(spa_guid(spa)); +spa_load_note(spa, "LOADED"); - return (0); +return (0); } static int spa_load_retry(spa_t *spa, spa_load_state_t state) { - int mode = spa->spa_mode; +int mode = spa->spa_mode; - spa_unload(spa); - spa_deactivate(spa); +spa_unload(spa); +spa_deactivate(spa); - spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; +spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; - spa_activate(spa, mode); - spa_async_suspend(spa); +spa_activate(spa, mode); +spa_async_suspend(spa); - spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", - (u_longlong_t)spa->spa_load_max_txg); +spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", + (u_longlong_t)spa->spa_load_max_txg); - return (spa_load(spa, state, SPA_IMPORT_EXISTING)); +return (spa_load(spa, state, SPA_IMPORT_EXISTING)); } /* - * If spa_load() fails this function will try loading prior txg's. If - * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool - * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this - * function will not rewind the pool and will return the same error as - * spa_load(). - */ +* If spa_load() fails this function will try loading prior txg's. If +* 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool +* will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this +* function will not rewind the pool and will return the same error as +* spa_load(). +*/ static int spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, - int rewind_flags) -{ - nvlist_t *loadinfo = NULL; - nvlist_t *config = NULL; - int load_error, rewind_error; - uint64_t safe_rewind_txg; - uint64_t min_txg; - - if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { - spa->spa_load_max_txg = spa->spa_load_txg; - spa_set_log_state(spa, SPA_LOG_CLEAR); - } else { - spa->spa_load_max_txg = max_request; - if (max_request != UINT64_MAX) - spa->spa_extreme_rewind = B_TRUE; - } - - load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); - if (load_error == 0) - return (0); - if (load_error == ZFS_ERR_NO_CHECKPOINT) { - /* - * When attempting checkpoint-rewind on a pool with no - * checkpoint, we should not attempt to load uberblocks - * from previous txgs when spa_load fails. - */ - ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } - - if (spa->spa_root_vdev != NULL) - config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); +int rewind_flags) +{ +nvlist_t *loadinfo = NULL; +nvlist_t *config = NULL; +int load_error, rewind_error; +uint64_t safe_rewind_txg; +uint64_t min_txg; + +if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { + spa->spa_load_max_txg = spa->spa_load_txg; + spa_set_log_state(spa, SPA_LOG_CLEAR); +} else { + spa->spa_load_max_txg = max_request; + if (max_request != UINT64_MAX) + spa->spa_extreme_rewind = B_TRUE; +} - spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; - spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; +load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); +if (load_error == 0) + return (0); +if (load_error == ZFS_ERR_NO_CHECKPOINT) { + /* + * When attempting checkpoint-rewind on a pool with no + * checkpoint, we should not attempt to load uberblocks + * from previous txgs when spa_load fails. + */ + ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} - if (rewind_flags & ZPOOL_NEVER_REWIND) { - nvlist_free(config); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } +if (spa->spa_root_vdev != NULL) + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - if (state == SPA_LOAD_RECOVER) { - /* Price of rolling back is discarding txgs, including log */ - spa_set_log_state(spa, SPA_LOG_CLEAR); - } else { - /* - * If we aren't rolling back save the load info from our first - * import attempt so that we can restore it after attempting - * to rewind. - */ - loadinfo = spa->spa_load_info; - spa->spa_load_info = fnvlist_alloc(); - } +spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; +spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; - spa->spa_load_max_txg = spa->spa_last_ubsync_txg; - safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; - min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? - TXG_INITIAL : safe_rewind_txg; +if (rewind_flags & ZPOOL_NEVER_REWIND) { + nvlist_free(config); + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} +if (state == SPA_LOAD_RECOVER) { + /* Price of rolling back is discarding txgs, including log */ + spa_set_log_state(spa, SPA_LOG_CLEAR); +} else { /* - * Continue as long as we're finding errors, we're still within - * the acceptable rewind range, and we're still finding uberblocks + * If we aren't rolling back save the load info from our first + * import attempt so that we can restore it after attempting + * to rewind. */ - while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && - spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { - if (spa->spa_load_max_txg < safe_rewind_txg) - spa->spa_extreme_rewind = B_TRUE; - rewind_error = spa_load_retry(spa, state); - } + loadinfo = spa->spa_load_info; + spa->spa_load_info = fnvlist_alloc(); +} - spa->spa_extreme_rewind = B_FALSE; - spa->spa_load_max_txg = UINT64_MAX; +spa->spa_load_max_txg = spa->spa_last_ubsync_txg; +safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; +min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? + TXG_INITIAL : safe_rewind_txg; - if (config && (rewind_error || state != SPA_LOAD_RECOVER)) - spa_config_set(spa, config); - else - nvlist_free(config); +/* + * Continue as long as we're finding errors, we're still within + * the acceptable rewind range, and we're still finding uberblocks + */ +while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && + spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { + if (spa->spa_load_max_txg < safe_rewind_txg) + spa->spa_extreme_rewind = B_TRUE; + rewind_error = spa_load_retry(spa, state); +} - if (state == SPA_LOAD_RECOVER) { - ASSERT3P(loadinfo, ==, NULL); - spa_import_progress_remove(spa_guid(spa)); - return (rewind_error); - } else { - /* Store the rewind info as part of the initial load info */ - fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, - spa->spa_load_info); +spa->spa_extreme_rewind = B_FALSE; +spa->spa_load_max_txg = UINT64_MAX; - /* Restore the initial load info */ - fnvlist_free(spa->spa_load_info); - spa->spa_load_info = loadinfo; +if (config && (rewind_error || state != SPA_LOAD_RECOVER)) + spa_config_set(spa, config); +else + nvlist_free(config); - spa_import_progress_remove(spa_guid(spa)); - return (load_error); - } +if (state == SPA_LOAD_RECOVER) { + ASSERT3P(loadinfo, ==, NULL); + spa_import_progress_remove(spa_guid(spa)); + return (rewind_error); +} else { + /* Store the rewind info as part of the initial load info */ + fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, + spa->spa_load_info); + + /* Restore the initial load info */ + fnvlist_free(spa->spa_load_info); + spa->spa_load_info = loadinfo; + + spa_import_progress_remove(spa_guid(spa)); + return (load_error); +} } /* - * Pool Open/Import - * - * The import case is identical to an open except that the configuration is sent - * down from userland, instead of grabbed from the configuration cache. For the - * case of an open, the pool configuration will exist in the - * POOL_STATE_UNINITIALIZED state. - * - * The stats information (gen/count/ustats) is used to gather vdev statistics at - * the same time open the pool, without having to keep around the spa_t in some - * ambiguous state. - */ +* Pool Open/Import +* +* The import case is identical to an open except that the configuration is sent +* down from userland, instead of grabbed from the configuration cache. For the +* case of an open, the pool configuration will exist in the +* POOL_STATE_UNINITIALIZED state. +* +* The stats information (gen/count/ustats) is used to gather vdev statistics at +* the same time open the pool, without having to keep around the spa_t in some +* ambiguous state. +*/ static int spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, - nvlist_t **config) +nvlist_t **config) { - spa_t *spa; - spa_load_state_t state = SPA_LOAD_OPEN; - int error; - int locked = B_FALSE; - int firstopen = B_FALSE; +spa_t *spa; +spa_load_state_t state = SPA_LOAD_OPEN; +int error; +int locked = B_FALSE; +int firstopen = B_FALSE; - *spapp = NULL; +*spapp = NULL; - /* - * As disgusting as this is, we need to support recursive calls to this - * function because dsl_dir_open() is called during spa_load(), and ends - * up calling spa_open() again. The real fix is to figure out how to - * avoid dsl_dir_open() calling this in the first place. - */ - if (MUTEX_NOT_HELD(&spa_namespace_lock)) { - mutex_enter(&spa_namespace_lock); - locked = B_TRUE; - } +/* + * As disgusting as this is, we need to support recursive calls to this + * function because dsl_dir_open() is called during spa_load(), and ends + * up calling spa_open() again. The real fix is to figure out how to + * avoid dsl_dir_open() calling this in the first place. + */ +if (MUTEX_NOT_HELD(&spa_namespace_lock)) { + mutex_enter(&spa_namespace_lock); + locked = B_TRUE; +} - if ((spa = spa_lookup(pool)) == NULL) { - if (locked) - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } +if ((spa = spa_lookup(pool)) == NULL) { + if (locked) + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); +} - if (spa->spa_state == POOL_STATE_UNINITIALIZED) { - zpool_load_policy_t policy; +if (spa->spa_state == POOL_STATE_UNINITIALIZED) { + zpool_load_policy_t policy; - firstopen = B_TRUE; + firstopen = B_TRUE; - zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config, - &policy); - if (policy.zlp_rewind & ZPOOL_DO_REWIND) - state = SPA_LOAD_RECOVER; + zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config, + &policy); + if (policy.zlp_rewind & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; - spa_activate(spa, spa_mode_global); + spa_activate(spa, spa_mode_global); - if (state != SPA_LOAD_RECOVER) - spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; - spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; + if (state != SPA_LOAD_RECOVER) + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; - zfs_dbgmsg("spa_open_common: opening %s", pool); - error = spa_load_best(spa, state, policy.zlp_txg, - policy.zlp_rewind); + zfs_dbgmsg("spa_open_common: opening %s", pool); + error = spa_load_best(spa, state, policy.zlp_txg, + policy.zlp_rewind); - if (error == EBADF) { - /* - * If vdev_validate() returns failure (indicated by - * EBADF), it indicates that one of the vdevs indicates - * that the pool has been exported or destroyed. If - * this is the case, the config cache is out of sync and - * we should remove the pool from the namespace. - */ - spa_unload(spa); - spa_deactivate(spa); - spa_write_cachefile(spa, B_TRUE, B_TRUE); - spa_remove(spa); - if (locked) - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } + if (error == EBADF) { + /* + * If vdev_validate() returns failure (indicated by + * EBADF), it indicates that one of the vdevs indicates + * that the pool has been exported or destroyed. If + * this is the case, the config cache is out of sync and + * we should remove the pool from the namespace. + */ + spa_unload(spa); + spa_deactivate(spa); + spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_remove(spa); + if (locked) + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); + } - if (error) { - /* - * We can't open the pool, but we still have useful - * information: the state of each vdev after the - * attempted vdev_open(). Return this to the user. - */ - if (config != NULL && spa->spa_config) { - VERIFY(nvlist_dup(spa->spa_config, config, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist(*config, - ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - } - spa_unload(spa); - spa_deactivate(spa); - spa->spa_last_open_failed = error; - if (locked) - mutex_exit(&spa_namespace_lock); - *spapp = NULL; - return (error); + if (error) { + /* + * We can't open the pool, but we still have useful + * information: the state of each vdev after the + * attempted vdev_open(). Return this to the user. + */ + if (config != NULL && spa->spa_config) { + VERIFY(nvlist_dup(spa->spa_config, config, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist(*config, + ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); } + spa_unload(spa); + spa_deactivate(spa); + spa->spa_last_open_failed = error; + if (locked) + mutex_exit(&spa_namespace_lock); + *spapp = NULL; + return (error); } +} - spa_open_ref(spa, tag); +spa_open_ref(spa, tag); - if (config != NULL) - *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); +if (config != NULL) + *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - /* - * If we've recovered the pool, pass back any information we - * gathered while doing the load. - */ - if (state == SPA_LOAD_RECOVER) { - VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - } +/* + * If we've recovered the pool, pass back any information we + * gathered while doing the load. + */ +if (state == SPA_LOAD_RECOVER) { + VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); +} - if (locked) { - spa->spa_last_open_failed = 0; - spa->spa_last_ubsync_txg = 0; - spa->spa_load_txg = 0; - mutex_exit(&spa_namespace_lock); - } +if (locked) { + spa->spa_last_open_failed = 0; + spa->spa_last_ubsync_txg = 0; + spa->spa_load_txg = 0; + mutex_exit(&spa_namespace_lock); +} - if (firstopen) - zvol_create_minors(spa, spa_name(spa), B_TRUE); +if (firstopen) + zvol_create_minors(spa, spa_name(spa), B_TRUE); - *spapp = spa; +*spapp = spa; - return (0); +return (0); } int spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, - nvlist_t **config) +nvlist_t **config) { - return (spa_open_common(name, spapp, tag, policy, config)); +return (spa_open_common(name, spapp, tag, policy, config)); } int spa_open(const char *name, spa_t **spapp, void *tag) { - return (spa_open_common(name, spapp, tag, NULL, NULL)); +return (spa_open_common(name, spapp, tag, NULL, NULL)); } /* - * Lookup the given spa_t, incrementing the inject count in the process, - * preventing it from being exported or destroyed. - */ +* Lookup the given spa_t, incrementing the inject count in the process, +* preventing it from being exported or destroyed. +*/ spa_t * spa_inject_addref(char *name) { - spa_t *spa; +spa_t *spa; - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(name)) == NULL) { - mutex_exit(&spa_namespace_lock); - return (NULL); - } - spa->spa_inject_ref++; +mutex_enter(&spa_namespace_lock); +if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); + return (NULL); +} +spa->spa_inject_ref++; +mutex_exit(&spa_namespace_lock); - return (spa); +return (spa); } void spa_inject_delref(spa_t *spa) { - mutex_enter(&spa_namespace_lock); - spa->spa_inject_ref--; - mutex_exit(&spa_namespace_lock); +mutex_enter(&spa_namespace_lock); +spa->spa_inject_ref--; +mutex_exit(&spa_namespace_lock); } /* - * Add spares device information to the nvlist. - */ +* Add spares device information to the nvlist. +*/ static void spa_add_spares(spa_t *spa, nvlist_t *config) { - nvlist_t **spares; - uint_t i, nspares; - nvlist_t *nvroot; - uint64_t guid; - vdev_stat_t *vs; - uint_t vsc; - uint64_t pool; - - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - - if (spa->spa_spares.sav_count == 0) - return; - - VERIFY(nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, +nvlist_t **spares; +uint_t i, nspares; +nvlist_t *nvroot; +uint64_t guid; +vdev_stat_t *vs; +uint_t vsc; +uint64_t pool; + +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + +if (spa->spa_spares.sav_count == 0) + return; + +VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); +VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); +if (nspares != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - if (nspares != 0) { - VERIFY(nvlist_add_nvlist_array(nvroot, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvroot, - ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); - /* - * Go through and find any spares which have since been - * repurposed as an active spare. If this is the case, update - * their status appropriately. - */ - for (i = 0; i < nspares; i++) { - VERIFY(nvlist_lookup_uint64(spares[i], - ZPOOL_CONFIG_GUID, &guid) == 0); - if (spa_spare_exists(guid, &pool, NULL) && - pool != 0ULL) { - VERIFY(nvlist_lookup_uint64_array( - spares[i], ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - vs->vs_state = VDEV_STATE_CANT_OPEN; - vs->vs_aux = VDEV_AUX_SPARED; - } + /* + * Go through and find any spares which have since been + * repurposed as an active spare. If this is the case, update + * their status appropriately. + */ + for (i = 0; i < nspares; i++) { + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (spa_spare_exists(guid, &pool, NULL) && + pool != 0ULL) { + VERIFY(nvlist_lookup_uint64_array( + spares[i], ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + vs->vs_state = VDEV_STATE_CANT_OPEN; + vs->vs_aux = VDEV_AUX_SPARED; } } } +} /* - * Add l2cache device information to the nvlist, including vdev stats. - */ +* Add l2cache device information to the nvlist, including vdev stats. +*/ static void spa_add_l2cache(spa_t *spa, nvlist_t *config) { - nvlist_t **l2cache; - uint_t i, j, nl2cache; - nvlist_t *nvroot; - uint64_t guid; - vdev_t *vd; - vdev_stat_t *vs; - uint_t vsc; - - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); +nvlist_t **l2cache; +uint_t i, j, nl2cache; +nvlist_t *nvroot; +uint64_t guid; +vdev_t *vd; +vdev_stat_t *vs; +uint_t vsc; + +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + +if (spa->spa_l2cache.sav_count == 0) + return; + +VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); +VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); +if (nl2cache != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - if (spa->spa_l2cache.sav_count == 0) - return; - - VERIFY(nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - if (nl2cache != 0) { - VERIFY(nvlist_add_nvlist_array(nvroot, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvroot, - ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - - /* - * Update level 2 cache device stats. - */ - - for (i = 0; i < nl2cache; i++) { - VERIFY(nvlist_lookup_uint64(l2cache[i], - ZPOOL_CONFIG_GUID, &guid) == 0); + /* + * Update level 2 cache device stats. + */ - vd = NULL; - for (j = 0; j < spa->spa_l2cache.sav_count; j++) { - if (guid == - spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { - vd = spa->spa_l2cache.sav_vdevs[j]; - break; - } + for (i = 0; i < nl2cache; i++) { + VERIFY(nvlist_lookup_uint64(l2cache[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + + vd = NULL; + for (j = 0; j < spa->spa_l2cache.sav_count; j++) { + if (guid == + spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { + vd = spa->spa_l2cache.sav_vdevs[j]; + break; } - ASSERT(vd != NULL); + } + ASSERT(vd != NULL); - VERIFY(nvlist_lookup_uint64_array(l2cache[i], - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) - == 0); - vdev_get_stats(vd, vs); - vdev_config_generate_stats(vd, l2cache[i]); + VERIFY(nvlist_lookup_uint64_array(l2cache[i], + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); + vdev_get_stats(vd, vs); + vdev_config_generate_stats(vd, l2cache[i]); - } } } +} static void spa_feature_stats_from_disk(spa_t *spa, nvlist_t *features) { - zap_cursor_t zc; - zap_attribute_t za; +zap_cursor_t zc; +zap_attribute_t za; - if (spa->spa_feat_for_read_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_read_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); +if (spa->spa_feat_for_read_obj != 0) { + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_feat_for_read_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + ASSERT(za.za_integer_length == sizeof (uint64_t) && + za.za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za.za_name, + za.za_first_integer)); } + zap_cursor_fini(&zc); +} - if (spa->spa_feat_for_write_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_write_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); +if (spa->spa_feat_for_write_obj != 0) { + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_feat_for_write_obj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + ASSERT(za.za_integer_length == sizeof (uint64_t) && + za.za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za.za_name, + za.za_first_integer)); } + zap_cursor_fini(&zc); +} } static void spa_feature_stats_from_cache(spa_t *spa, nvlist_t *features) { - int i; +int i; - for (i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t feature = spa_feature_table[i]; - uint64_t refcount; +for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t feature = spa_feature_table[i]; + uint64_t refcount; - if (feature_get_refcount(spa, &feature, &refcount) != 0) - continue; + if (feature_get_refcount(spa, &feature, &refcount) != 0) + continue; - VERIFY0(nvlist_add_uint64(features, feature.fi_guid, refcount)); - } + VERIFY0(nvlist_add_uint64(features, feature.fi_guid, refcount)); +} } /* - * Store a list of pool features and their reference counts in the - * config. - * - * The first time this is called on a spa, allocate a new nvlist, fetch - * the pool features and reference counts from disk, then save the list - * in the spa. In subsequent calls on the same spa use the saved nvlist - * and refresh its values from the cached reference counts. This - * ensures we don't block here on I/O on a suspended pool so 'zpool - * clear' can resume the pool. - */ +* Store a list of pool features and their reference counts in the +* config. +* +* The first time this is called on a spa, allocate a new nvlist, fetch +* the pool features and reference counts from disk, then save the list +* in the spa. In subsequent calls on the same spa use the saved nvlist +* and refresh its values from the cached reference counts. This +* ensures we don't block here on I/O on a suspended pool so 'zpool +* clear' can resume the pool. +*/ static void spa_add_feature_stats(spa_t *spa, nvlist_t *config) { - nvlist_t *features; +nvlist_t *features; - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); +ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - mutex_enter(&spa->spa_feat_stats_lock); - features = spa->spa_feat_stats; +mutex_enter(&spa->spa_feat_stats_lock); +features = spa->spa_feat_stats; - if (features != NULL) { - spa_feature_stats_from_cache(spa, features); - } else { - VERIFY0(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP)); - spa->spa_feat_stats = features; - spa_feature_stats_from_disk(spa, features); - } +if (features != NULL) { + spa_feature_stats_from_cache(spa, features); +} else { + VERIFY0(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP)); + spa->spa_feat_stats = features; + spa_feature_stats_from_disk(spa, features); +} - VERIFY0(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, - features)); +VERIFY0(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, + features)); - mutex_exit(&spa->spa_feat_stats_lock); +mutex_exit(&spa->spa_feat_stats_lock); } int spa_get_stats(const char *name, nvlist_t **config, - char *altroot, size_t buflen) +char *altroot, size_t buflen) { - int error; - spa_t *spa; +int error; +spa_t *spa; - *config = NULL; - error = spa_open_common(name, &spa, FTAG, NULL, config); +*config = NULL; +error = spa_open_common(name, &spa, FTAG, NULL, config); - if (spa != NULL) { - /* - * This still leaves a window of inconsistency where the spares - * or l2cache devices could change and the config would be - * self-inconsistent. - */ - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); +if (spa != NULL) { + /* + * This still leaves a window of inconsistency where the spares + * or l2cache devices could change and the config would be + * self-inconsistent. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - if (*config != NULL) { - uint64_t loadtimes[2]; + if (*config != NULL) { + uint64_t loadtimes[2]; - loadtimes[0] = spa->spa_loaded_ts.tv_sec; - loadtimes[1] = spa->spa_loaded_ts.tv_nsec; - VERIFY(nvlist_add_uint64_array(*config, - ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); + loadtimes[0] = spa->spa_loaded_ts.tv_sec; + loadtimes[1] = spa->spa_loaded_ts.tv_nsec; + VERIFY(nvlist_add_uint64_array(*config, + ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_ERRCOUNT, - spa_get_errlog_size(spa)) == 0); - - if (spa_suspended(spa)) { - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_SUSPENDED, - spa->spa_failmode) == 0); - VERIFY(nvlist_add_uint64(*config, - ZPOOL_CONFIG_SUSPENDED_REASON, - spa->spa_suspended) == 0); - } + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_ERRCOUNT, + spa_get_errlog_size(spa)) == 0); - spa_add_spares(spa, *config); - spa_add_l2cache(spa, *config); - spa_add_feature_stats(spa, *config); + if (spa_suspended(spa)) { + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED, + spa->spa_failmode) == 0); + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED_REASON, + spa->spa_suspended) == 0); } + + spa_add_spares(spa, *config); + spa_add_l2cache(spa, *config); + spa_add_feature_stats(spa, *config); } +} - /* - * We want to get the alternate root even for faulted pools, so we cheat - * and call spa_lookup() directly. - */ - if (altroot) { - if (spa == NULL) { - mutex_enter(&spa_namespace_lock); - spa = spa_lookup(name); - if (spa) - spa_altroot(spa, altroot, buflen); - else - altroot[0] = '\0'; - spa = NULL; - mutex_exit(&spa_namespace_lock); - } else { +/* + * We want to get the alternate root even for faulted pools, so we cheat + * and call spa_lookup() directly. + */ +if (altroot) { + if (spa == NULL) { + mutex_enter(&spa_namespace_lock); + spa = spa_lookup(name); + if (spa) spa_altroot(spa, altroot, buflen); - } + else + altroot[0] = '\0'; + spa = NULL; + mutex_exit(&spa_namespace_lock); + } else { + spa_altroot(spa, altroot, buflen); } +} - if (spa != NULL) { - spa_config_exit(spa, SCL_CONFIG, FTAG); - spa_close(spa, FTAG); - } +if (spa != NULL) { + spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_close(spa, FTAG); +} - return (error); +return (error); } /* - * Validate that the auxiliary device array is well formed. We must have an - * array of nvlists, each which describes a valid leaf vdev. If this is an - * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be - * specified, as long as they are well-formed. - */ +* Validate that the auxiliary device array is well formed. We must have an +* array of nvlists, each which describes a valid leaf vdev. If this is an +* import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be +* specified, as long as they are well-formed. +*/ static int spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, - spa_aux_vdev_t *sav, const char *config, uint64_t version, - vdev_labeltype_t label) +spa_aux_vdev_t *sav, const char *config, uint64_t version, +vdev_labeltype_t label) { - nvlist_t **dev; - uint_t i, ndev; - vdev_t *vd; - int error; +nvlist_t **dev; +uint_t i, ndev; +vdev_t *vd; +int error; - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - /* - * It's acceptable to have no devs specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) - return (0); +/* + * It's acceptable to have no devs specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) + return (0); - if (ndev == 0) - return (SET_ERROR(EINVAL)); +if (ndev == 0) + return (SET_ERROR(EINVAL)); - /* - * Make sure the pool is formatted with a version that supports this - * device type. - */ - if (spa_version(spa) < version) - return (SET_ERROR(ENOTSUP)); +/* + * Make sure the pool is formatted with a version that supports this + * device type. + */ +if (spa_version(spa) < version) + return (SET_ERROR(ENOTSUP)); - /* - * Set the pending device list so we correctly handle device in-use - * checking. - */ - sav->sav_pending = dev; - sav->sav_npending = ndev; +/* + * Set the pending device list so we correctly handle device in-use + * checking. + */ +sav->sav_pending = dev; +sav->sav_npending = ndev; - for (i = 0; i < ndev; i++) { - if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, - mode)) != 0) - goto out; +for (i = 0; i < ndev; i++) { + if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, + mode)) != 0) + goto out; - if (!vd->vdev_ops->vdev_op_leaf) { - vdev_free(vd); - error = SET_ERROR(EINVAL); - goto out; - } + if (!vd->vdev_ops->vdev_op_leaf) { + vdev_free(vd); + error = SET_ERROR(EINVAL); + goto out; + } - vd->vdev_top = vd; + vd->vdev_top = vd; - if ((error = vdev_open(vd)) == 0 && - (error = vdev_label_init(vd, crtxg, label)) == 0) { - VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - } + if ((error = vdev_open(vd)) == 0 && + (error = vdev_label_init(vd, crtxg, label)) == 0) { + VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } - vdev_free(vd); + vdev_free(vd); - if (error && - (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) - goto out; - else - error = 0; - } + if (error && + (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) + goto out; + else + error = 0; +} out: - sav->sav_pending = NULL; - sav->sav_npending = 0; - return (error); +sav->sav_pending = NULL; +sav->sav_npending = 0; +return (error); } static int spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) { - int error; +int error; - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); +ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, - &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, - VDEV_LABEL_SPARE)) != 0) { - return (error); - } +if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, + VDEV_LABEL_SPARE)) != 0) { + return (error); +} - return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, - &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, - VDEV_LABEL_L2CACHE)); +return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, + &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, + VDEV_LABEL_L2CACHE)); } static void spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, - const char *config) +const char *config) { - int i; +int i; - if (sav->sav_config != NULL) { - nvlist_t **olddevs; - uint_t oldndevs; - nvlist_t **newdevs; +if (sav->sav_config != NULL) { + nvlist_t **olddevs; + uint_t oldndevs; + nvlist_t **newdevs; - /* - * Generate new dev list by concatenating with the - * current dev list. - */ - VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, - &olddevs, &oldndevs) == 0); + /* + * Generate new dev list by concatenating with the + * current dev list. + */ + VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, + &olddevs, &oldndevs) == 0); - newdevs = kmem_alloc(sizeof (void *) * - (ndevs + oldndevs), KM_SLEEP); - for (i = 0; i < oldndevs; i++) - VERIFY(nvlist_dup(olddevs[i], &newdevs[i], - KM_SLEEP) == 0); - for (i = 0; i < ndevs; i++) - VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], - KM_SLEEP) == 0); + newdevs = kmem_alloc(sizeof (void *) * + (ndevs + oldndevs), KM_SLEEP); + for (i = 0; i < oldndevs; i++) + VERIFY(nvlist_dup(olddevs[i], &newdevs[i], + KM_SLEEP) == 0); + for (i = 0; i < ndevs; i++) + VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], + KM_SLEEP) == 0); - VERIFY(nvlist_remove(sav->sav_config, config, - DATA_TYPE_NVLIST_ARRAY) == 0); + VERIFY(nvlist_remove(sav->sav_config, config, + DATA_TYPE_NVLIST_ARRAY) == 0); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, - config, newdevs, ndevs + oldndevs) == 0); - for (i = 0; i < oldndevs + ndevs; i++) - nvlist_free(newdevs[i]); - kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); - } else { - /* - * Generate a new dev list. - */ - VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, - devs, ndevs) == 0); - } + VERIFY(nvlist_add_nvlist_array(sav->sav_config, + config, newdevs, ndevs + oldndevs) == 0); + for (i = 0; i < oldndevs + ndevs; i++) + nvlist_free(newdevs[i]); + kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); +} else { + /* + * Generate a new dev list. + */ + VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, + devs, ndevs) == 0); +} } /* - * Stop and drop level 2 ARC devices - */ +* Stop and drop level 2 ARC devices +*/ void spa_l2cache_drop(spa_t *spa) { - vdev_t *vd; - int i; - spa_aux_vdev_t *sav = &spa->spa_l2cache; +vdev_t *vd; +int i; +spa_aux_vdev_t *sav = &spa->spa_l2cache; - for (i = 0; i < sav->sav_count; i++) { - uint64_t pool; +for (i = 0; i < sav->sav_count; i++) { + uint64_t pool; - vd = sav->sav_vdevs[i]; - ASSERT(vd != NULL); + vd = sav->sav_vdevs[i]; + ASSERT(vd != NULL); - if (spa_l2cache_exists(vd->vdev_guid, &pool) && - pool != 0ULL && l2arc_vdev_present(vd)) - l2arc_remove_vdev(vd); - } + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); +} } /* - * Verify encryption parameters for spa creation. If we are encrypting, we must - * have the encryption feature flag enabled. - */ +* Verify encryption parameters for spa creation. If we are encrypting, we must +* have the encryption feature flag enabled. +*/ static int spa_create_check_encryption_params(dsl_crypto_params_t *dcp, - boolean_t has_encryption) +boolean_t has_encryption) { - if (dcp->cp_crypt != ZIO_CRYPT_OFF && - dcp->cp_crypt != ZIO_CRYPT_INHERIT && - !has_encryption) - return (SET_ERROR(ENOTSUP)); +if (dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT && + !has_encryption) + return (SET_ERROR(ENOTSUP)); - return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); +return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); } /* - * Pool Creation - */ +* Pool Creation +*/ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops, dsl_crypto_params_t *dcp) -{ - spa_t *spa; - char *altroot = NULL; - vdev_t *rvd; - dsl_pool_t *dp; - dmu_tx_t *tx; - int error = 0; - uint64_t txg = TXG_INITIAL; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - uint64_t version, obj; - boolean_t has_features; - boolean_t has_encryption; - spa_feature_t feat; - char *feat_name; - char *poolname; - nvlist_t *nvl; - - if (props == NULL || - nvlist_lookup_string(props, "tname", &poolname) != 0) - poolname = (char *)pool; +nvlist_t *zplprops, dsl_crypto_params_t *dcp) +{ +spa_t *spa; +char *altroot = NULL; +vdev_t *rvd; +dsl_pool_t *dp; +dmu_tx_t *tx; +int error = 0; +uint64_t txg = TXG_INITIAL; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; +uint64_t version, obj; +boolean_t has_features; +boolean_t has_encryption; +boolean_t has_allocclass; +spa_feature_t feat; +char *feat_name; +char *poolname; +nvlist_t *nvl; + +if (props == NULL || + nvlist_lookup_string(props, "tname", &poolname) != 0) + poolname = (char *)pool; - /* - * If this pool already exists, return failure. - */ - mutex_enter(&spa_namespace_lock); - if (spa_lookup(poolname) != NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EEXIST)); - } +/* + * If this pool already exists, return failure. + */ +mutex_enter(&spa_namespace_lock); +if (spa_lookup(poolname) != NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EEXIST)); +} - /* - * Allocate a new spa_t structure. - */ - nvl = fnvlist_alloc(); - fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); - (void) nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); - spa = spa_add(poolname, nvl, altroot); - fnvlist_free(nvl); - spa_activate(spa, spa_mode_global); +/* + * Allocate a new spa_t structure. + */ +nvl = fnvlist_alloc(); +fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); +(void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); +spa = spa_add(poolname, nvl, altroot); +fnvlist_free(nvl); +spa_activate(spa, spa_mode_global); + +if (props && (error = spa_prop_validate(spa, props))) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} + +/* + * Temporary pool names should never be written to disk. + */ +if (poolname != pool) + spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; - if (props && (error = spa_prop_validate(spa, props))) { +has_features = B_FALSE; +has_encryption = B_FALSE; +has_allocclass = B_FALSE; +for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); + elem != NULL; elem = nvlist_next_nvpair(props, elem)) { + if (zpool_prop_feature(nvpair_name(elem))) { + has_features = B_TRUE; + + feat_name = strchr(nvpair_name(elem), '@') + 1; + VERIFY0(zfeature_lookup_name(feat_name, &feat)); + if (feat == SPA_FEATURE_ENCRYPTION) + has_encryption = B_TRUE; + if (feat == SPA_FEATURE_ALLOCATION_CLASSES) + has_allocclass = B_TRUE; + } +} + +/* verify encryption params, if they were provided */ +if (dcp != NULL) { + error = spa_create_check_encryption_params(dcp, has_encryption); + if (error != 0) { spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } +} +if (!has_allocclass && zfs_special_devs(nvroot)) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (ENOTSUP); +} - /* - * Temporary pool names should never be written to disk. - */ - if (poolname != pool) - spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; - - has_features = B_FALSE; - has_encryption = B_FALSE; - for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); - elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) { - has_features = B_TRUE; - - feat_name = strchr(nvpair_name(elem), '@') + 1; - VERIFY0(zfeature_lookup_name(feat_name, &feat)); - if (feat == SPA_FEATURE_ENCRYPTION) - has_encryption = B_TRUE; - } - } +if (has_features || nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { + version = SPA_VERSION; +} +ASSERT(SPA_VERSION_IS_SUPPORTED(version)); - /* verify encryption params, if they were provided */ - if (dcp != NULL) { - error = spa_create_check_encryption_params(dcp, has_encryption); - if (error != 0) { - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } - } +spa->spa_first_txg = txg; +spa->spa_uberblock.ub_txg = txg - 1; +spa->spa_uberblock.ub_version = version; +spa->spa_ubsync = spa->spa_uberblock; +spa->spa_load_state = SPA_LOAD_CREATE; +spa->spa_removing_phys.sr_state = DSS_NONE; +spa->spa_removing_phys.sr_removing_vdev = -1; +spa->spa_removing_phys.sr_prev_indirect_vdev = -1; +spa->spa_indirect_vdevs_loaded = B_TRUE; - if (has_features || nvlist_lookup_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { - version = SPA_VERSION; - } - ASSERT(SPA_VERSION_IS_SUPPORTED(version)); +/* + * Create "The Godfather" zio to hold all async IOs + */ +spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); +for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); +} - spa->spa_first_txg = txg; - spa->spa_uberblock.ub_txg = txg - 1; - spa->spa_uberblock.ub_version = version; - spa->spa_ubsync = spa->spa_uberblock; - spa->spa_load_state = SPA_LOAD_CREATE; - spa->spa_removing_phys.sr_state = DSS_NONE; - spa->spa_removing_phys.sr_removing_vdev = -1; - spa->spa_removing_phys.sr_prev_indirect_vdev = -1; - spa->spa_indirect_vdevs_loaded = B_TRUE; +/* + * Create the root vdev. + */ +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - /* - * Create "The Godfather" zio to hold all async IOs - */ - spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), - KM_SLEEP); - for (int i = 0; i < max_ncpus; i++) { - spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); - } +error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); - /* - * Create the root vdev. - */ - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +ASSERT(error != 0 || rvd != NULL); +ASSERT(error != 0 || spa->spa_root_vdev == rvd); - error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); +if (error == 0 && !zfs_allocatable_devs(nvroot)) + error = SET_ERROR(EINVAL); - ASSERT(error != 0 || rvd != NULL); - ASSERT(error != 0 || spa->spa_root_vdev == rvd); +if (error == 0 && + (error = vdev_create(rvd, txg, B_FALSE)) == 0 && + (error = spa_validate_aux(spa, nvroot, txg, + VDEV_ALLOC_ADD)) == 0) { + /* + * instantiate the metaslab groups (this will dirty the vdevs) + * we can no longer error exit past this point + */ + for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; - if (error == 0 && !zfs_allocatable_devs(nvroot)) - error = SET_ERROR(EINVAL); + vdev_metaslab_set_size(vd); + vdev_expand(vd, txg); + } +} - if (error == 0 && - (error = vdev_create(rvd, txg, B_FALSE)) == 0 && - (error = spa_validate_aux(spa, nvroot, txg, - VDEV_ALLOC_ADD)) == 0) { - /* - * instantiate the metaslab groups (this will dirty the vdevs) - * we can no longer error exit past this point - */ - for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { - vdev_t *vd = rvd->vdev_child[c]; +spa_config_exit(spa, SCL_ALL, FTAG); - vdev_metaslab_set_size(vd); - vdev_expand(vd, txg); - } - } +if (error != 0) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} +/* + * Get the list of spares, if specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; +} - if (error != 0) { - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } - - /* - * Get the list of spares, if specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, - KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_spares.sav_sync = B_TRUE; - } +/* + * Get the list of level 2 cache devices, if specified. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; +} - /* - * Get the list of level 2 cache devices, if specified. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_l2cache.sav_sync = B_TRUE; - } +spa->spa_is_initializing = B_TRUE; +spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); +spa->spa_is_initializing = B_FALSE; - spa->spa_is_initializing = B_TRUE; - spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); - spa->spa_is_initializing = B_FALSE; +/* + * Create DDTs (dedup tables). + */ +ddt_create(spa); - /* - * Create DDTs (dedup tables). - */ - ddt_create(spa); +spa_update_dspace(spa); - spa_update_dspace(spa); +tx = dmu_tx_create_assigned(dp, txg); - tx = dmu_tx_create_assigned(dp, txg); +/* + * Create the pool's history object. + */ +if (version >= SPA_VERSION_ZPOOL_HISTORY && !spa->spa_history) + spa_history_create_obj(spa, tx); - /* - * Create the pool's history object. - */ - if (version >= SPA_VERSION_ZPOOL_HISTORY && !spa->spa_history) - spa_history_create_obj(spa, tx); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); +spa_history_log_version(spa, "create", tx); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); - spa_history_log_version(spa, "create", tx); +/* + * Create the pool config object. + */ +spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, + DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); - /* - * Create the pool config object. - */ - spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, - DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, - DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, + sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool config"); +} - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, - sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { - cmn_err(CE_PANIC, "failed to add pool config"); - } +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, + sizeof (uint64_t), 1, &version, tx) != 0) { + cmn_err(CE_PANIC, "failed to add pool version"); +} +/* Newly created pools with the right version are always deflated. */ +if (version >= SPA_VERSION_RAIDZ_DEFLATE) { + spa->spa_deflate = TRUE; if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, - sizeof (uint64_t), 1, &version, tx) != 0) { - cmn_err(CE_PANIC, "failed to add pool version"); - } - - /* Newly created pools with the right version are always deflated. */ - if (version >= SPA_VERSION_RAIDZ_DEFLATE) { - spa->spa_deflate = TRUE; - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, - sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { - cmn_err(CE_PANIC, "failed to add deflate"); - } + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { + cmn_err(CE_PANIC, "failed to add deflate"); } +} - /* - * Create the deferred-free bpobj. Turn off compression - * because sync-to-convergence takes longer if the blocksize - * keeps changing. - */ - obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); - dmu_object_set_compress(spa->spa_meta_objset, obj, - ZIO_COMPRESS_OFF, tx); - if (zap_add(spa->spa_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, - sizeof (uint64_t), 1, &obj, tx) != 0) { - cmn_err(CE_PANIC, "failed to add bpobj"); - } - VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, - spa->spa_meta_objset, obj)); +/* + * Create the deferred-free bpobj. Turn off compression + * because sync-to-convergence takes longer if the blocksize + * keeps changing. + */ +obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); +dmu_object_set_compress(spa->spa_meta_objset, obj, + ZIO_COMPRESS_OFF, tx); +if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, + sizeof (uint64_t), 1, &obj, tx) != 0) { + cmn_err(CE_PANIC, "failed to add bpobj"); +} +VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, + spa->spa_meta_objset, obj)); - /* - * Generate some random noise for salted checksums to operate on. - */ - (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, - sizeof (spa->spa_cksum_salt.zcs_bytes)); +/* + * Generate some random noise for salted checksums to operate on. + */ +(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); - /* - * Set pool properties. - */ - spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); - spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); - spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); - spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); - spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); - spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); +/* + * Set pool properties. + */ +spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); +spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); +spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); +spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); +spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); +spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); - if (props != NULL) { - spa_configfile_set(spa, props, B_FALSE); - spa_sync_props(props, tx); - } +if (props != NULL) { + spa_configfile_set(spa, props, B_FALSE); + spa_sync_props(props, tx); +} - dmu_tx_commit(tx); +dmu_tx_commit(tx); - spa->spa_sync_on = B_TRUE; - txg_sync_start(dp); - mmp_thread_start(spa); - txg_wait_synced(dp, txg); +spa->spa_sync_on = B_TRUE; +txg_sync_start(dp); +mmp_thread_start(spa); +txg_wait_synced(dp, txg); - spa_spawn_aux_threads(spa); +spa_spawn_aux_threads(spa); - spa_write_cachefile(spa, B_FALSE, B_TRUE); +spa_write_cachefile(spa, B_FALSE, B_TRUE); - /* - * Don't count references from objsets that are already closed - * and are making their way through the eviction process. - */ - spa_evicting_os_wait(spa); - spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); - spa->spa_load_state = SPA_LOAD_NONE; +/* + * Don't count references from objsets that are already closed + * and are making their way through the eviction process. + */ +spa_evicting_os_wait(spa); +spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); +spa->spa_load_state = SPA_LOAD_NONE; - mutex_exit(&spa_namespace_lock); +mutex_exit(&spa_namespace_lock); - return (0); +return (0); } /* - * Import a non-root pool into the system. - */ +* Import a non-root pool into the system. +*/ int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) { - spa_t *spa; - char *altroot = NULL; - spa_load_state_t state = SPA_LOAD_IMPORT; - zpool_load_policy_t policy; - uint64_t mode = spa_mode_global; - uint64_t readonly = B_FALSE; - int error; - nvlist_t *nvroot; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - - /* - * If a pool with this name exists, return failure. - */ - mutex_enter(&spa_namespace_lock); - if (spa_lookup(pool) != NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EEXIST)); - } +spa_t *spa; +char *altroot = NULL; +spa_load_state_t state = SPA_LOAD_IMPORT; +zpool_load_policy_t policy; +uint64_t mode = spa_mode_global; +uint64_t readonly = B_FALSE; +int error; +nvlist_t *nvroot; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; - /* - * Create and initialize the spa structure. - */ - (void) nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); - (void) nvlist_lookup_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); - if (readonly) - mode = FREAD; - spa = spa_add(pool, config, altroot); - spa->spa_import_flags = flags; +/* + * If a pool with this name exists, return failure. + */ +mutex_enter(&spa_namespace_lock); +if (spa_lookup(pool) != NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EEXIST)); +} - /* - * Verbatim import - Take a pool and insert it into the namespace - * as if it had been loaded at boot. - */ - if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { - if (props != NULL) - spa_configfile_set(spa, props, B_FALSE); +/* + * Create and initialize the spa structure. + */ +(void) nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); +(void) nvlist_lookup_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); +if (readonly) + mode = FREAD; +spa = spa_add(pool, config, altroot); +spa->spa_import_flags = flags; - spa_write_cachefile(spa, B_FALSE, B_TRUE); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); - zfs_dbgmsg("spa_import: verbatim import of %s", pool); - mutex_exit(&spa_namespace_lock); - return (0); - } +/* + * Verbatim import - Take a pool and insert it into the namespace + * as if it had been loaded at boot. + */ +if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { + if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); - spa_activate(spa, mode); + spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); + zfs_dbgmsg("spa_import: verbatim import of %s", pool); + mutex_exit(&spa_namespace_lock); + return (0); +} - /* - * Don't start async tasks until we know everything is healthy. - */ - spa_async_suspend(spa); +spa_activate(spa, mode); - zpool_get_load_policy(config, &policy); - if (policy.zlp_rewind & ZPOOL_DO_REWIND) - state = SPA_LOAD_RECOVER; +/* + * Don't start async tasks until we know everything is healthy. + */ +spa_async_suspend(spa); - spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT; +zpool_get_load_policy(config, &policy); +if (policy.zlp_rewind & ZPOOL_DO_REWIND) + state = SPA_LOAD_RECOVER; - if (state != SPA_LOAD_RECOVER) { - spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; - zfs_dbgmsg("spa_import: importing %s", pool); - } else { - zfs_dbgmsg("spa_import: importing %s, max_txg=%lld " - "(RECOVERY MODE)", pool, (longlong_t)policy.zlp_txg); - } - error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); +spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT; - /* - * Propagate anything learned while loading the pool and pass it - * back to caller (i.e. rewind info, missing devices, etc). - */ - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); +if (state != SPA_LOAD_RECOVER) { + spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; + zfs_dbgmsg("spa_import: importing %s", pool); +} else { + zfs_dbgmsg("spa_import: importing %s, max_txg=%lld " + "(RECOVERY MODE)", pool, (longlong_t)policy.zlp_txg); +} +error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - /* - * Toss any existing sparelist, as it doesn't have any validity - * anymore, and conflicts with spa_has_spare(). - */ - if (spa->spa_spares.sav_config) { - nvlist_free(spa->spa_spares.sav_config); - spa->spa_spares.sav_config = NULL; - spa_load_spares(spa); - } - if (spa->spa_l2cache.sav_config) { - nvlist_free(spa->spa_l2cache.sav_config); - spa->spa_l2cache.sav_config = NULL; - spa_load_l2cache(spa); - } +/* + * Propagate anything learned while loading the pool and pass it + * back to caller (i.e. rewind info, missing devices, etc). + */ +VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - spa_config_exit(spa, SCL_ALL, FTAG); +spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); +/* + * Toss any existing sparelist, as it doesn't have any validity + * anymore, and conflicts with spa_has_spare(). + */ +if (spa->spa_spares.sav_config) { + nvlist_free(spa->spa_spares.sav_config); + spa->spa_spares.sav_config = NULL; + spa_load_spares(spa); +} +if (spa->spa_l2cache.sav_config) { + nvlist_free(spa->spa_l2cache.sav_config); + spa->spa_l2cache.sav_config = NULL; + spa_load_l2cache(spa); +} - if (props != NULL) - spa_configfile_set(spa, props, B_FALSE); +VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); +spa_config_exit(spa, SCL_ALL, FTAG); - if (error != 0 || (props && spa_writeable(spa) && - (error = spa_prop_set(spa, props)))) { - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (error); - } +if (props != NULL) + spa_configfile_set(spa, props, B_FALSE); - spa_async_resume(spa); +if (error != 0 || (props && spa_writeable(spa) && + (error = spa_prop_set(spa, props)))) { + spa_unload(spa); + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); +} - /* - * Override any spares and level 2 cache devices as specified by - * the user, as these may have correct device names/devids, etc. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - if (spa->spa_spares.sav_config) - VERIFY(nvlist_remove(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); - else - VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, - ZPOOL_CONFIG_SPARES, spares, nspares) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_spares(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_spares.sav_sync = B_TRUE; - } - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - if (spa->spa_l2cache.sav_config) - VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); - else - VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, - ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa_load_l2cache(spa); - spa_config_exit(spa, SCL_ALL, FTAG); - spa->spa_l2cache.sav_sync = B_TRUE; - } +spa_async_resume(spa); - /* - * Check for any removed devices. - */ - if (spa->spa_autoreplace) { - spa_aux_check_removed(&spa->spa_spares); - spa_aux_check_removed(&spa->spa_l2cache); - } +/* + * Override any spares and level 2 cache devices as specified by + * the user, as these may have correct device names/devids, etc. + */ +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + if (spa->spa_spares.sav_config) + VERIFY(nvlist_remove(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_spares(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_spares.sav_sync = B_TRUE; +} +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + if (spa->spa_l2cache.sav_config) + VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, + ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa_load_l2cache(spa); + spa_config_exit(spa, SCL_ALL, FTAG); + spa->spa_l2cache.sav_sync = B_TRUE; +} - if (spa_writeable(spa)) { - /* - * Update the config cache to include the newly-imported pool. - */ - spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); - } +/* + * Check for any removed devices. + */ +if (spa->spa_autoreplace) { + spa_aux_check_removed(&spa->spa_spares); + spa_aux_check_removed(&spa->spa_l2cache); +} +if (spa_writeable(spa)) { /* - * It's possible that the pool was expanded while it was exported. - * We kick off an async task to handle this for us. + * Update the config cache to include the newly-imported pool. */ - spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); + spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); +} + +/* + * It's possible that the pool was expanded while it was exported. + * We kick off an async task to handle this for us. + */ +spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); - spa_history_log_version(spa, "import", NULL); +spa_history_log_version(spa, "import", NULL); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); - zvol_create_minors(spa, pool, B_TRUE); +zvol_create_minors(spa, pool, B_TRUE); - mutex_exit(&spa_namespace_lock); +mutex_exit(&spa_namespace_lock); - return (0); +return (0); } nvlist_t * spa_tryimport(nvlist_t *tryconfig) { - nvlist_t *config = NULL; - char *poolname, *cachefile; - spa_t *spa; - uint64_t state; - int error; - zpool_load_policy_t policy; +nvlist_t *config = NULL; +char *poolname, *cachefile; +spa_t *spa; +uint64_t state; +int error; +zpool_load_policy_t policy; - if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) - return (NULL); +if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) + return (NULL); - if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) - return (NULL); +if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) + return (NULL); - /* - * Create and initialize the spa structure. - */ - mutex_enter(&spa_namespace_lock); - spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); - spa_activate(spa, FREAD); +/* + * Create and initialize the spa structure. + */ +mutex_enter(&spa_namespace_lock); +spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); +spa_activate(spa, FREAD); - /* - * Rewind pool if a max txg was provided. - */ - zpool_get_load_policy(spa->spa_config, &policy); - if (policy.zlp_txg != UINT64_MAX) { - spa->spa_load_max_txg = policy.zlp_txg; - spa->spa_extreme_rewind = B_TRUE; - zfs_dbgmsg("spa_tryimport: importing %s, max_txg=%lld", - poolname, (longlong_t)policy.zlp_txg); - } else { - zfs_dbgmsg("spa_tryimport: importing %s", poolname); - } +/* + * Rewind pool if a max txg was provided. + */ +zpool_get_load_policy(spa->spa_config, &policy); +if (policy.zlp_txg != UINT64_MAX) { + spa->spa_load_max_txg = policy.zlp_txg; + spa->spa_extreme_rewind = B_TRUE; + zfs_dbgmsg("spa_tryimport: importing %s, max_txg=%lld", + poolname, (longlong_t)policy.zlp_txg); +} else { + zfs_dbgmsg("spa_tryimport: importing %s", poolname); +} - if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_CACHEFILE, &cachefile) - == 0) { - zfs_dbgmsg("spa_tryimport: using cachefile '%s'", cachefile); - spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; - } else { - spa->spa_config_source = SPA_CONFIG_SRC_SCAN; - } +if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_CACHEFILE, &cachefile) + == 0) { + zfs_dbgmsg("spa_tryimport: using cachefile '%s'", cachefile); + spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; +} else { + spa->spa_config_source = SPA_CONFIG_SRC_SCAN; +} - error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); +error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); + +/* + * If 'tryconfig' was at least parsable, return the current config. + */ +if (spa->spa_root_vdev != NULL) { + config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); + VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, + poolname) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, + state) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + spa->spa_uberblock.ub_timestamp) == 0); + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + spa->spa_load_info) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, + spa->spa_errata) == 0); /* - * If 'tryconfig' was at least parsable, return the current config. + * If the bootfs property exists on this pool then we + * copy it out so that external consumers can tell which + * pools are bootable. */ - if (spa->spa_root_vdev != NULL) { - config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); - VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, - poolname) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, - state) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, - spa->spa_uberblock.ub_timestamp) == 0); - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, - spa->spa_errata) == 0); + if ((!error || error == EEXIST) && spa->spa_bootfs) { + char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* - * If the bootfs property exists on this pool then we - * copy it out so that external consumers can tell which - * pools are bootable. + * We have to play games with the name since the + * pool was opened as TRYIMPORT_NAME. */ - if ((!error || error == EEXIST) && spa->spa_bootfs) { - char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + if (dsl_dsobj_to_dsname(spa_name(spa), + spa->spa_bootfs, tmpname) == 0) { + char *cp; + char *dsname; - /* - * We have to play games with the name since the - * pool was opened as TRYIMPORT_NAME. - */ - if (dsl_dsobj_to_dsname(spa_name(spa), - spa->spa_bootfs, tmpname) == 0) { - char *cp; - char *dsname; - - dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - cp = strchr(tmpname, '/'); - if (cp == NULL) { - (void) strlcpy(dsname, tmpname, - MAXPATHLEN); - } else { - (void) snprintf(dsname, MAXPATHLEN, - "%s/%s", poolname, ++cp); - } - VERIFY(nvlist_add_string(config, - ZPOOL_CONFIG_BOOTFS, dsname) == 0); - kmem_free(dsname, MAXPATHLEN); + dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + cp = strchr(tmpname, '/'); + if (cp == NULL) { + (void) strlcpy(dsname, tmpname, + MAXPATHLEN); + } else { + (void) snprintf(dsname, MAXPATHLEN, + "%s/%s", poolname, ++cp); } - kmem_free(tmpname, MAXPATHLEN); + VERIFY(nvlist_add_string(config, + ZPOOL_CONFIG_BOOTFS, dsname) == 0); + kmem_free(dsname, MAXPATHLEN); } - - /* - * Add the list of hot spares and level 2 cache devices. - */ - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - spa_add_spares(spa, config); - spa_add_l2cache(spa, config); - spa_config_exit(spa, SCL_CONFIG, FTAG); + kmem_free(tmpname, MAXPATHLEN); } - spa_unload(spa); - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); + /* + * Add the list of hot spares and level 2 cache devices. + */ + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_add_spares(spa, config); + spa_add_l2cache(spa, config); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} - return (config); +spa_unload(spa); +spa_deactivate(spa); +spa_remove(spa); +mutex_exit(&spa_namespace_lock); + +return (config); } /* - * Pool export/destroy - * - * The act of destroying or exporting a pool is very simple. We make sure there - * is no more pending I/O and any references to the pool are gone. Then, we - * update the pool state and sync all the labels to disk, removing the - * configuration from the cache afterwards. If the 'hardforce' flag is set, then - * we don't sync the labels or remove the configuration cache. - */ +* Pool export/destroy +* +* The act of destroying or exporting a pool is very simple. We make sure there +* is no more pending I/O and any references to the pool are gone. Then, we +* update the pool state and sync all the labels to disk, removing the +* configuration from the cache afterwards. If the 'hardforce' flag is set, then +* we don't sync the labels or remove the configuration cache. +*/ static int spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, - boolean_t force, boolean_t hardforce) +boolean_t force, boolean_t hardforce) { - spa_t *spa; +spa_t *spa; - if (oldconfig) - *oldconfig = NULL; +if (oldconfig) + *oldconfig = NULL; - if (!(spa_mode_global & FWRITE)) - return (SET_ERROR(EROFS)); +if (!(spa_mode_global & FWRITE)) + return (SET_ERROR(EROFS)); - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(pool)) == NULL) { - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOENT)); - } +mutex_enter(&spa_namespace_lock); +if ((spa = spa_lookup(pool)) == NULL) { + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOENT)); +} - /* - * Put a hold on the pool, drop the namespace lock, stop async tasks, - * reacquire the namespace lock, and see if we can export. - */ - spa_open_ref(spa, FTAG); +if (spa->spa_is_exporting) { + /* the pool is being exported by another thread */ mutex_exit(&spa_namespace_lock); - spa_async_suspend(spa); - if (spa->spa_zvol_taskq) { - zvol_remove_minors(spa, spa_name(spa), B_TRUE); - taskq_wait(spa->spa_zvol_taskq); - } - mutex_enter(&spa_namespace_lock); - spa_close(spa, FTAG); + return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS)); +} +spa->spa_is_exporting = B_TRUE; - if (spa->spa_state == POOL_STATE_UNINITIALIZED) - goto export_spa; - /* - * The pool will be in core if it's openable, in which case we can - * modify its state. Objsets may be open only because they're dirty, - * so we have to force it to sync before checking spa_refcnt. - */ - if (spa->spa_sync_on) { - txg_wait_synced(spa->spa_dsl_pool, 0); - spa_evicting_os_wait(spa); - } +/* + * Put a hold on the pool, drop the namespace lock, stop async tasks, + * reacquire the namespace lock, and see if we can export. + */ +spa_open_ref(spa, FTAG); +mutex_exit(&spa_namespace_lock); +spa_async_suspend(spa); +if (spa->spa_zvol_taskq) { + zvol_remove_minors(spa, spa_name(spa), B_TRUE); + taskq_wait(spa->spa_zvol_taskq); +} +mutex_enter(&spa_namespace_lock); +spa_close(spa, FTAG); + +if (spa->spa_state == POOL_STATE_UNINITIALIZED) + goto export_spa; +/* + * The pool will be in core if it's openable, in which case we can + * modify its state. Objsets may be open only because they're dirty, + * so we have to force it to sync before checking spa_refcnt. + */ +if (spa->spa_sync_on) { + txg_wait_synced(spa->spa_dsl_pool, 0); + spa_evicting_os_wait(spa); +} + +/* + * A pool cannot be exported or destroyed if there are active + * references. If we are resetting a pool, allow references by + * fault injection handlers. + */ +if (!spa_refcount_zero(spa) || + (spa->spa_inject_ref != 0 && + new_state != POOL_STATE_UNINITIALIZED)) { + spa_async_resume(spa); + spa->spa_is_exporting = B_FALSE; + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EBUSY)); +} +if (spa->spa_sync_on) { /* - * A pool cannot be exported or destroyed if there are active - * references. If we are resetting a pool, allow references by - * fault injection handlers. + * A pool cannot be exported if it has an active shared spare. + * This is to prevent other pools stealing the active spare + * from an exported pool. At user's own will, such pool can + * be forcedly exported. */ - if (!spa_refcount_zero(spa) || - (spa->spa_inject_ref != 0 && - new_state != POOL_STATE_UNINITIALIZED)) { + if (!force && new_state == POOL_STATE_EXPORTED && + spa_has_active_shared_spare(spa)) { spa_async_resume(spa); + spa->spa_is_exporting = B_FALSE; mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EBUSY)); + return (SET_ERROR(EXDEV)); } - if (spa->spa_sync_on) { - /* - * A pool cannot be exported if it has an active shared spare. - * This is to prevent other pools stealing the active spare - * from an exported pool. At user's own will, such pool can - * be forcedly exported. - */ - if (!force && new_state == POOL_STATE_EXPORTED && - spa_has_active_shared_spare(spa)) { - spa_async_resume(spa); - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(EXDEV)); - } - - /* - * We're about to export or destroy this pool. Make sure - * we stop all initialization and trim activity here before - * we set the spa_final_txg. This will ensure that all - * dirty data resulting from the initialization is - * committed to disk before we unload the pool. - */ - if (spa->spa_root_vdev != NULL) { - vdev_t *rvd = spa->spa_root_vdev; - vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE); - vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE); - vdev_autotrim_stop_all(spa); - } + /* + * We're about to export or destroy this pool. Make sure + * we stop all initialization and trim activity here before + * we set the spa_final_txg. This will ensure that all + * dirty data resulting from the initialization is + * committed to disk before we unload the pool. + */ + if (spa->spa_root_vdev != NULL) { + vdev_t *rvd = spa->spa_root_vdev; + vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE); + vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE); + vdev_autotrim_stop_all(spa); + } - /* - * We want this to be reflected on every label, - * so mark them all dirty. spa_unload() will do the - * final sync that pushes these changes out. - */ - if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - spa->spa_state = new_state; - spa->spa_final_txg = spa_last_synced_txg(spa) + - TXG_DEFER_SIZE + 1; - vdev_config_dirty(spa->spa_root_vdev); - spa_config_exit(spa, SCL_ALL, FTAG); - } + /* + * We want this to be reflected on every label, + * so mark them all dirty. spa_unload() will do the + * final sync that pushes these changes out. + */ + if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + spa->spa_state = new_state; + spa->spa_final_txg = spa_last_synced_txg(spa) + + TXG_DEFER_SIZE + 1; + vdev_config_dirty(spa->spa_root_vdev); + spa_config_exit(spa, SCL_ALL, FTAG); } +} export_spa: - if (new_state == POOL_STATE_DESTROYED) - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY); - else if (new_state == POOL_STATE_EXPORTED) - spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_EXPORT); +if (new_state == POOL_STATE_DESTROYED) + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY); +else if (new_state == POOL_STATE_EXPORTED) + spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_EXPORT); - if (spa->spa_state != POOL_STATE_UNINITIALIZED) { - spa_unload(spa); - spa_deactivate(spa); - } +if (spa->spa_state != POOL_STATE_UNINITIALIZED) { + spa_unload(spa); + spa_deactivate(spa); +} - if (oldconfig && spa->spa_config) - VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); +if (oldconfig && spa->spa_config) + VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); - if (new_state != POOL_STATE_UNINITIALIZED) { - if (!hardforce) - spa_write_cachefile(spa, B_TRUE, B_TRUE); - spa_remove(spa); - } - mutex_exit(&spa_namespace_lock); +if (new_state != POOL_STATE_UNINITIALIZED) { + if (!hardforce) + spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_remove(spa); +} else { + /* + * If spa_remove() is not called for this spa_t and + * there is any possibility that it can be reused, + * we make sure to reset the exporting flag. + */ + spa->spa_is_exporting = B_FALSE; +} - return (0); +mutex_exit(&spa_namespace_lock); +return (0); } /* - * Destroy a storage pool. - */ +* Destroy a storage pool. +*/ int spa_destroy(char *pool) { - return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, - B_FALSE, B_FALSE)); +return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, + B_FALSE, B_FALSE)); } /* - * Export a storage pool. - */ +* Export a storage pool. +*/ int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce) +boolean_t hardforce) { - return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, - force, hardforce)); +return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, + force, hardforce)); } /* - * Similar to spa_export(), this unloads the spa_t without actually removing it - * from the namespace in any way. - */ +* Similar to spa_export(), this unloads the spa_t without actually removing it +* from the namespace in any way. +*/ int spa_reset(char *pool) { - return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, - B_FALSE, B_FALSE)); +return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, + B_FALSE, B_FALSE)); } /* - * ========================================================================== - * Device manipulation - * ========================================================================== - */ +* ========================================================================== +* Device manipulation +* ========================================================================== +*/ /* - * Add a device to a storage pool. - */ +* Add a device to a storage pool. +*/ int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { - uint64_t txg, id; - int error; - vdev_t *rvd = spa->spa_root_vdev; - vdev_t *vd, *tvd; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - - ASSERT(spa_writeable(spa)); +uint64_t txg, id; +int error; +vdev_t *rvd = spa->spa_root_vdev; +vdev_t *vd, *tvd; +nvlist_t **spares, **l2cache; +uint_t nspares, nl2cache; - txg = spa_vdev_enter(spa); +ASSERT(spa_writeable(spa)); - if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, - VDEV_ALLOC_ADD)) != 0) - return (spa_vdev_exit(spa, NULL, txg, error)); +txg = spa_vdev_enter(spa); - spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ +if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, NULL, txg, error)); - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, - &nspares) != 0) - nspares = 0; +spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, - &nl2cache) != 0) - nl2cache = 0; +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares) != 0) + nspares = 0; - if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) - return (spa_vdev_exit(spa, vd, txg, EINVAL)); +if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, + &nl2cache) != 0) + nl2cache = 0; - if (vd->vdev_children != 0 && - (error = vdev_create(vd, txg, B_FALSE)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); +if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) + return (spa_vdev_exit(spa, vd, txg, EINVAL)); - /* - * We must validate the spares and l2cache devices after checking the - * children. Otherwise, vdev_inuse() will blindly overwrite the spare. - */ - if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); +if (vd->vdev_children != 0 && + (error = vdev_create(vd, txg, B_FALSE)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); - /* - * If we are in the middle of a device removal, we can only add - * devices which match the existing devices in the pool. - * If we are in the middle of a removal, or have some indirect - * vdevs, we can not add raidz toplevels. - */ - if (spa->spa_vdev_removal != NULL || - spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { - for (int c = 0; c < vd->vdev_children; c++) { - tvd = vd->vdev_child[c]; - if (spa->spa_vdev_removal != NULL && - tvd->vdev_ashift != spa->spa_max_ashift) { - return (spa_vdev_exit(spa, vd, txg, EINVAL)); - } - /* Fail if top level vdev is raidz */ - if (tvd->vdev_ops == &vdev_raidz_ops) { - return (spa_vdev_exit(spa, vd, txg, EINVAL)); - } - /* - * Need the top level mirror to be - * a mirror of leaf vdevs only - */ - if (tvd->vdev_ops == &vdev_mirror_ops) { - for (uint64_t cid = 0; - cid < tvd->vdev_children; cid++) { - vdev_t *cvd = tvd->vdev_child[cid]; - if (!cvd->vdev_ops->vdev_op_leaf) { - return (spa_vdev_exit(spa, vd, - txg, EINVAL)); - } - } - } - } - } +/* + * We must validate the spares and l2cache devices after checking the + * children. Otherwise, vdev_inuse() will blindly overwrite the spare. + */ +if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); +/* + * If we are in the middle of a device removal, we can only add + * devices which match the existing devices in the pool. + * If we are in the middle of a removal, or have some indirect + * vdevs, we can not add raidz toplevels. + */ +if (spa->spa_vdev_removal != NULL || + spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { for (int c = 0; c < vd->vdev_children; c++) { - + tvd = vd->vdev_child[c]; + if (spa->spa_vdev_removal != NULL && + tvd->vdev_ashift != spa->spa_max_ashift) { + return (spa_vdev_exit(spa, vd, txg, EINVAL)); + } + /* Fail if top level vdev is raidz */ + if (tvd->vdev_ops == &vdev_raidz_ops) { + return (spa_vdev_exit(spa, vd, txg, EINVAL)); + } /* - * Set the vdev id to the first hole, if one exists. + * Need the top level mirror to be + * a mirror of leaf vdevs only */ - for (id = 0; id < rvd->vdev_children; id++) { - if (rvd->vdev_child[id]->vdev_ishole) { - vdev_free(rvd->vdev_child[id]); - break; + if (tvd->vdev_ops == &vdev_mirror_ops) { + for (uint64_t cid = 0; + cid < tvd->vdev_children; cid++) { + vdev_t *cvd = tvd->vdev_child[cid]; + if (!cvd->vdev_ops->vdev_op_leaf) { + return (spa_vdev_exit(spa, vd, + txg, EINVAL)); + } } } - tvd = vd->vdev_child[c]; - vdev_remove_child(vd, tvd); - tvd->vdev_id = id; - vdev_add_child(rvd, tvd); - vdev_config_dirty(tvd); - } - - if (nspares != 0) { - spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, - ZPOOL_CONFIG_SPARES); - spa_load_spares(spa); - spa->spa_spares.sav_sync = B_TRUE; } +} - if (nl2cache != 0) { - spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, - ZPOOL_CONFIG_L2CACHE); - spa_load_l2cache(spa); - spa->spa_l2cache.sav_sync = B_TRUE; - } +for (int c = 0; c < vd->vdev_children; c++) { /* - * We have to be careful when adding new vdevs to an existing pool. - * If other threads start allocating from these vdevs before we - * sync the config cache, and we lose power, then upon reboot we may - * fail to open the pool because there are DVAs that the config cache - * can't translate. Therefore, we first add the vdevs without - * initializing metaslabs; sync the config cache (via spa_vdev_exit()); - * and then let spa_config_update() initialize the new metaslabs. - * - * spa_load() checks for added-but-not-initialized vdevs, so that - * if we lose power at any point in this sequence, the remaining - * steps will be completed the next time we load the pool. + * Set the vdev id to the first hole, if one exists. */ - (void) spa_vdev_exit(spa, vd, txg, 0); + for (id = 0; id < rvd->vdev_children; id++) { + if (rvd->vdev_child[id]->vdev_ishole) { + vdev_free(rvd->vdev_child[id]); + break; + } + } + tvd = vd->vdev_child[c]; + vdev_remove_child(vd, tvd); + tvd->vdev_id = id; + vdev_add_child(rvd, tvd); + vdev_config_dirty(tvd); +} - mutex_enter(&spa_namespace_lock); - spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); - spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); - mutex_exit(&spa_namespace_lock); +if (nspares != 0) { + spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, + ZPOOL_CONFIG_SPARES); + spa_load_spares(spa); + spa->spa_spares.sav_sync = B_TRUE; +} - return (0); +if (nl2cache != 0) { + spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, + ZPOOL_CONFIG_L2CACHE); + spa_load_l2cache(spa); + spa->spa_l2cache.sav_sync = B_TRUE; } /* - * Attach a device to a mirror. The arguments are the path to any device - * in the mirror, and the nvroot for the new device. If the path specifies - * a device that is not mirrored, we automatically insert the mirror vdev. + * We have to be careful when adding new vdevs to an existing pool. + * If other threads start allocating from these vdevs before we + * sync the config cache, and we lose power, then upon reboot we may + * fail to open the pool because there are DVAs that the config cache + * can't translate. Therefore, we first add the vdevs without + * initializing metaslabs; sync the config cache (via spa_vdev_exit()); + * and then let spa_config_update() initialize the new metaslabs. * - * If 'replacing' is specified, the new device is intended to replace the - * existing device; in this case the two devices are made into their own - * mirror using the 'replacing' vdev, which is functionally identical to - * the mirror vdev (it actually reuses all the same ops) but has a few - * extra rules: you can't attach to it after it's been created, and upon - * completion of resilvering, the first disk (the one being replaced) - * is automatically detached. + * spa_load() checks for added-but-not-initialized vdevs, so that + * if we lose power at any point in this sequence, the remaining + * steps will be completed the next time we load the pool. */ +(void) spa_vdev_exit(spa, vd, txg, 0); + +mutex_enter(&spa_namespace_lock); +spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); +spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); +mutex_exit(&spa_namespace_lock); + +return (0); +} + +/* +* Attach a device to a mirror. The arguments are the path to any device +* in the mirror, and the nvroot for the new device. If the path specifies +* a device that is not mirrored, we automatically insert the mirror vdev. +* +* If 'replacing' is specified, the new device is intended to replace the +* existing device; in this case the two devices are made into their own +* mirror using the 'replacing' vdev, which is functionally identical to +* the mirror vdev (it actually reuses all the same ops) but has a few +* extra rules: you can't attach to it after it's been created, and upon +* completion of resilvering, the first disk (the one being replaced) +* is automatically detached. +*/ int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) { - uint64_t txg, dtl_max_txg; - ASSERTV(vdev_t *rvd = spa->spa_root_vdev); - vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; - vdev_ops_t *pvops; - char *oldvdpath, *newvdpath; - int newvd_isspare = B_FALSE; - int error; - boolean_t raidz = B_FALSE; +uint64_t txg, dtl_max_txg; +ASSERTV(vdev_t *rvd = spa->spa_root_vdev); +vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; +vdev_ops_t *pvops; +char *oldvdpath, *newvdpath; +int newvd_isspare = B_FALSE; +int error; +boolean_t raidz = B_FALSE; - ASSERT(spa_writeable(spa)); +ASSERT(spa_writeable(spa)); - txg = spa_vdev_enter(spa); +txg = spa_vdev_enter(spa); - oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); +oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { - error = (spa_has_checkpoint(spa)) ? - ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; - return (spa_vdev_exit(spa, NULL, txg, error)); - } +ASSERT(MUTEX_HELD(&spa_namespace_lock)); +if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, error)); +} - if (spa->spa_vdev_removal != NULL) - return (spa_vdev_exit(spa, NULL, txg, EBUSY)); +if (spa->spa_vdev_removal != NULL) + return (spa_vdev_exit(spa, NULL, txg, EBUSY)); - if (oldvd == NULL) - return (spa_vdev_exit(spa, NULL, txg, ENODEV)); +if (oldvd == NULL) + return (spa_vdev_exit(spa, NULL, txg, ENODEV)); - if (oldvd->vdev_ops == &vdev_raidz_ops) { - raidz = B_TRUE; - } else if (!oldvd->vdev_ops->vdev_op_leaf) { - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); - } +if (oldvd->vdev_ops == &vdev_raidz_ops) { + raidz = B_TRUE; +} else if (!oldvd->vdev_ops->vdev_op_leaf) { + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); +} - if (raidz) - pvd = oldvd; - else - pvd = oldvd->vdev_parent; +if (raidz) + pvd = oldvd; +else + pvd = oldvd->vdev_parent; - if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, - VDEV_ALLOC_ATTACH)) != 0) - return (spa_vdev_exit(spa, NULL, txg, EINVAL)); +if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, + VDEV_ALLOC_ATTACH)) != 0) + return (spa_vdev_exit(spa, NULL, txg, EINVAL)); + +if (newrootvd->vdev_children != 1) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); - if (newrootvd->vdev_children != 1) - return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); +newvd = newrootvd->vdev_child[0]; - newvd = newrootvd->vdev_child[0]; +if (!newvd->vdev_ops->vdev_op_leaf) + return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); - if (!newvd->vdev_ops->vdev_op_leaf) - return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); +if ((error = vdev_create(newrootvd, txg, replacing)) != 0) + return (spa_vdev_exit(spa, newrootvd, txg, error)); - if ((error = vdev_create(newrootvd, txg, replacing)) != 0) - return (spa_vdev_exit(spa, newrootvd, txg, error)); +/* + * Spares can't replace logs + */ +if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); +if (!replacing) { /* - * Spares can't replace logs + * For attach, the only allowable parent is a mirror or the root + * vdev. */ - if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) + if (pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_raidz_ops && + pvd->vdev_ops != &vdev_root_ops) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - if (!replacing) { - /* - * For attach, the only allowable parent is a mirror or the root - * vdev. - */ - if (pvd->vdev_ops != &vdev_mirror_ops && - pvd->vdev_ops != &vdev_raidz_ops && - pvd->vdev_ops != &vdev_root_ops) - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - - pvops = &vdev_mirror_ops; - } else { - /* - * Active hot spares can only be replaced by inactive hot - * spares. - */ - if (pvd->vdev_ops == &vdev_spare_ops && - oldvd->vdev_isspare && - !spa_has_spare(spa, newvd->vdev_guid)) - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - - /* - * If the source is a hot spare, and the parent isn't already a - * spare, then we want to create a new hot spare. Otherwise, we - * want to create a replacing vdev. The user is not allowed to - * attach to a spared vdev child unless the 'isspare' state is - * the same (spare replaces spare, non-spare replaces - * non-spare). - */ - if (pvd->vdev_ops == &vdev_replacing_ops && - spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - } else if (pvd->vdev_ops == &vdev_spare_ops && - newvd->vdev_isspare != oldvd->vdev_isspare) { - return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - } - - if (newvd->vdev_isspare) - pvops = &vdev_spare_ops; - else - pvops = &vdev_replacing_ops; - } - + pvops = &vdev_mirror_ops; +} else { /* - * Make sure the new device is big enough. + * Active hot spares can only be replaced by inactive hot + * spares. */ - vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd; - if (newvd->vdev_asize < vdev_get_min_asize(min_vdev)) - return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); + if (pvd->vdev_ops == &vdev_spare_ops && + oldvd->vdev_isspare && + !spa_has_spare(spa, newvd->vdev_guid)) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); /* - * The new device cannot have a higher alignment requirement - * than the top-level vdev. + * If the source is a hot spare, and the parent isn't already a + * spare, then we want to create a new hot spare. Otherwise, we + * want to create a replacing vdev. The user is not allowed to + * attach to a spared vdev child unless the 'isspare' state is + * the same (spare replaces spare, non-spare replaces + * non-spare). */ - if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) - return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); - - if (raidz) { - oldvdpath = kmem_asprintf("raidz%u-%u", - oldvd->vdev_nparity, oldvd->vdev_id); - } else { - oldvdpath = spa_strdup(oldvd->vdev_path); + if (pvd->vdev_ops == &vdev_replacing_ops && + spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + } else if (pvd->vdev_ops == &vdev_spare_ops && + newvd->vdev_isspare != oldvd->vdev_isspare) { + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } - newvdpath = spa_strdup(newvd->vdev_path); - /* - * If this is an in-place replacement, update oldvd's path and devid - * to make it distinguishable from newvd, and unopenable from now on. - */ - if (strcmp(oldvdpath, newvdpath) == 0) { - spa_strfree(oldvd->vdev_path); - oldvd->vdev_path = kmem_alloc(strlen(newvdpath) + 5, - KM_SLEEP); - (void) sprintf(oldvd->vdev_path, "%s/old", - newvdpath); - if (oldvd->vdev_devid != NULL) { - spa_strfree(oldvd->vdev_devid); - oldvd->vdev_devid = NULL; - } - spa_strfree(oldvdpath); - oldvdpath = spa_strdup(oldvd->vdev_path); + if (newvd->vdev_isspare) + pvops = &vdev_spare_ops; + else + pvops = &vdev_replacing_ops; +} + +/* + * Make sure the new device is big enough. + */ +vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd; +if (newvd->vdev_asize < vdev_get_min_asize(min_vdev)) + return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); + +/* + * The new device cannot have a higher alignment requirement + * than the top-level vdev. + */ +if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) + return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); + +if (raidz) { + oldvdpath = kmem_asprintf("raidz%u-%u", + oldvd->vdev_nparity, oldvd->vdev_id); +} else { + oldvdpath = spa_strdup(oldvd->vdev_path); +} +newvdpath = spa_strdup(newvd->vdev_path); + +/* + * If this is an in-place replacement, update oldvd's path and devid + * to make it distinguishable from newvd, and unopenable from now on. + */ +if (strcmp(oldvdpath, newvdpath) == 0) { + spa_strfree(oldvd->vdev_path); + oldvd->vdev_path = kmem_alloc(strlen(newvdpath) + 5, + KM_SLEEP); + (void) sprintf(oldvd->vdev_path, "%s/old", + newvdpath); + if (oldvd->vdev_devid != NULL) { + spa_strfree(oldvd->vdev_devid); + oldvd->vdev_devid = NULL; } + spa_strfree(oldvdpath); + oldvdpath = spa_strdup(oldvd->vdev_path); +} - /* mark the device being resilvered */ - if (!raidz) - newvd->vdev_resilver_txg = txg; +/* mark the device being resilvered */ +if (!raidz) + newvd->vdev_resilver_txg = txg; - /* - * If the parent is not a mirror, or if we're replacing, insert the new - * mirror/replacing/spare vdev above oldvd. - */ - if (!raidz && pvd->vdev_ops != pvops) - pvd = vdev_add_parent(oldvd, pvops); +/* + * If the parent is not a mirror, or if we're replacing, insert the new + * mirror/replacing/spare vdev above oldvd. + */ +if (!raidz && pvd->vdev_ops != pvops) + pvd = vdev_add_parent(oldvd, pvops); - ASSERT(pvd->vdev_top->vdev_parent == rvd); +ASSERT(pvd->vdev_top->vdev_parent == rvd); #if 0 - ASSERT(pvd->vdev_ops == pvops); - ASSERT(oldvd->vdev_parent == pvd); +ASSERT(pvd->vdev_ops == pvops); +ASSERT(oldvd->vdev_parent == pvd); #endif - /* - * Extract the new device from its root and add it to pvd. - */ - vdev_remove_child(newrootvd, newvd); - newvd->vdev_id = pvd->vdev_children; - newvd->vdev_crtxg = oldvd->vdev_crtxg; - vdev_add_child(pvd, newvd); +/* + * Extract the new device from its root and add it to pvd. + */ +vdev_remove_child(newrootvd, newvd); +newvd->vdev_id = pvd->vdev_children; +newvd->vdev_crtxg = oldvd->vdev_crtxg; +vdev_add_child(pvd, newvd); - /* - * Reevaluate the parent vdev state. - */ - vdev_propagate_state(pvd); +/* + * Reevaluate the parent vdev state. + */ +vdev_propagate_state(pvd); - tvd = newvd->vdev_top; - ASSERT(pvd->vdev_top == tvd); - ASSERT(tvd->vdev_parent == rvd); +tvd = newvd->vdev_top; +ASSERT(pvd->vdev_top == tvd); +ASSERT(tvd->vdev_parent == rvd); - vdev_config_dirty(tvd); +vdev_config_dirty(tvd); - /* - * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account - * for any dmu_sync-ed blocks. It will propagate upward when - * spa_vdev_exit() calls vdev_dtl_reassess(). - */ - dtl_max_txg = txg + TXG_CONCURRENT_STATES; +/* + * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account + * for any dmu_sync-ed blocks. It will propagate upward when + * spa_vdev_exit() calls vdev_dtl_reassess(). + */ +dtl_max_txg = txg + TXG_CONCURRENT_STATES; - if (raidz) { - dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); - dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync, - newvd, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED, tx); - dmu_tx_commit(tx); - } else { - vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, - dtl_max_txg - TXG_INITIAL); +if (raidz) { + dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); + dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync, + newvd, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED, tx); + dmu_tx_commit(tx); +} else { + vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, + dtl_max_txg - TXG_INITIAL); - if (newvd->vdev_isspare) { - spa_spare_activate(newvd); - spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE); - } + if (newvd->vdev_isspare) { + spa_spare_activate(newvd); + spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE); + } - newvd_isspare = newvd->vdev_isspare; + newvd_isspare = newvd->vdev_isspare; - /* - * Mark newvd's DTL dirty in this txg. - */ - vdev_dirty(tvd, VDD_DTL, newvd, txg); + /* + * Mark newvd's DTL dirty in this txg. + */ + vdev_dirty(tvd, VDD_DTL, newvd, txg); - /* - * Schedule the resilver to restart in the future. We do this to - * ensure that dmu_sync-ed blocks have been stitched into the - * respective datasets. We do not do this if resilvers have been - * deferred. - */ - if (dsl_scan_resilvering(spa_get_dsl(spa)) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, newvd); - else - dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); - } + /* + * Schedule the resilver to restart in the future. We do this to + * ensure that dmu_sync-ed blocks have been stitched into the + * respective datasets. We do not do this if resilvers have been + * deferred. + */ + if (dsl_scan_resilvering(spa_get_dsl(spa)) && + spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_defer_resilver(newvd); + else + dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg); +} if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); @@ -7494,7 +7530,7 @@ spa_async_thread(void *arg) if (tasks & SPA_ASYNC_RESILVER && (!dsl_scan_resilvering(dp) || !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))) - dsl_resilver_restart(dp, 0); + dsl_scan_restart_resilver(dp, 0); if (tasks & SPA_ASYNC_INITIALIZE_RESTART) { mutex_enter(&spa_namespace_lock); @@ -7618,6 +7654,12 @@ spa_async_request(spa_t *spa, int task) mutex_exit(&spa->spa_async_lock); } +int +spa_async_tasks(spa_t *spa) +{ + return (spa->spa_async_tasks); +} + /* * ========================================================================== * SPA syncing routines @@ -7957,7 +7999,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) case ZPOOL_PROP_READONLY: case ZPOOL_PROP_CACHEFILE: /* - * 'readonly' and 'cachefile' are also non-persisitent + * 'readonly' and 'cachefile' are also non-persistent * properties. */ break; @@ -8790,7 +8832,7 @@ EXPORT_SYMBOL(spa_inject_delref); EXPORT_SYMBOL(spa_scan_stat_init); EXPORT_SYMBOL(spa_scan_get_stats); -/* device maniion */ +/* device manipulation */ EXPORT_SYMBOL(spa_vdev_add); EXPORT_SYMBOL(spa_vdev_attach); EXPORT_SYMBOL(spa_vdev_detach); @@ -8829,9 +8871,11 @@ EXPORT_SYMBOL(spa_event_notify); #endif #if defined(_KERNEL) -module_param(spa_load_verify_maxinflight, int, 0644); -MODULE_PARM_DESC(spa_load_verify_maxinflight, - "Max concurrent traversal I/Os while verifying pool during import -X"); +/* BEGIN CSTYLED */ +module_param(spa_load_verify_shift, int, 0644); +MODULE_PARM_DESC(spa_load_verify_shift, "log2(fraction of arc that can " + "be used by inflight I/Os when verifying pool during import"); +/* END CSTYLED */ module_param(spa_load_verify_metadata, int, 0644); MODULE_PARM_DESC(spa_load_verify_metadata, diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index d6f68ceda589..44711acef5a2 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -102,7 +102,7 @@ * Once the synctask is done and the discarding zthr is awake, we discard * the checkpointed data over multiple TXGs by having the zthr prefetching * entries from vdev_checkpoint_sm and then starting a synctask that places - * them as free blocks in to their respective ms_allocatable and ms_sm + * them as free blocks into their respective ms_allocatable and ms_sm * structures. * [see spa_checkpoint_discard_thread()] * diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 8616abda37bd..8c7c14999da6 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -93,8 +93,7 @@ spa_config_load(void) */ pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(pathname, MAXPATHLEN, "%s%s", - (rootdir != NULL) ? "./" : "", spa_config_path); + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); file = kobj_open_file(pathname); @@ -458,7 +457,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment); - hostid = spa_get_hostid(); + hostid = spa_get_hostid(spa); if (hostid != 0) fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename); diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index e42f8a0212f6..fa5120eb61b3 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -31,7 +31,7 @@ * and the current log. All errors seen are logged to the current log. When a * scrub completes, the current log becomes the last log, the last log is thrown * out, and the current log is reinitialized. This way, if an error is somehow - * corrected, a new scrub will show that that it no longer exists, and will be + * corrected, a new scrub will show that it no longer exists, and will be * deleted from the log when the scrub completes. * * The log is stored using a ZAP object whose key is a string form of the diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index b590a1d57bd1..fa95d316073b 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -63,7 +63,7 @@ * overwrite the original creation of the pool. 'sh_phys_max_off' is the * physical ending offset in bytes of the log. This tells you the length of * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record - * is added, 'sh_eof' is incremented by the the size of the record. + * is added, 'sh_eof' is incremented by the size of the record. * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). * This is where the consumer should start reading from after reading in * the 'zpool create' portion of the log. diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index bff6c38cec7d..7edcf2b3295c 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) +spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) { int wlocks_held = 0; @@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_exit(spa_t *spa, int locks, void *tag) +spa_config_exit(spa_t *spa, int locks, const void *tag) { for (int i = SCL_LOCKS - 1; i >= 0; i--) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; @@ -658,6 +658,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_proc = &p0; spa->spa_proc_state = SPA_PROC_NONE; spa->spa_trust_config = B_TRUE; + spa->spa_hostid = zone_get_hostid(NULL); spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); @@ -2540,22 +2541,10 @@ spa_multihost(spa_t *spa) return (spa->spa_multihost ? B_TRUE : B_FALSE); } -unsigned long -spa_get_hostid(void) +uint32_t +spa_get_hostid(spa_t *spa) { - unsigned long myhostid; - -#ifdef _KERNEL - myhostid = zone_get_hostid(NULL); -#else /* _KERNEL */ - /* - * We're emulating the system's hostid in userland, so - * we can't use zone_get_hostid(). - */ - (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); -#endif /* _KERNEL */ - - return (myhostid); + return (spa->spa_hostid); } boolean_t diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 0fcd569e3b44..418315be86d1 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -644,8 +644,8 @@ txg_quiesce_thread(void *arg) /* * Delay this thread by delay nanoseconds if we are still in the open - * transaction group and there is already a waiting txg quiesing or quiesced. - * Abort the delay if this txg stalls or enters the quiesing state. + * transaction group and there is already a waiting txg quiescing or quiesced. + * Abort the delay if this txg stalls or enters the quiescing state. */ void txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) @@ -675,8 +675,8 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) mutex_exit(&tx->tx_sync_lock); } -void -txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +static boolean_t +txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig) { tx_state_t *tx = &dp->dp_tx; @@ -695,9 +695,39 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg) "tx_synced=%llu waiting=%llu dp=%p\n", tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); cv_broadcast(&tx->tx_sync_more_cv); - cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + if (wait_sig) { + /* + * Condition wait here but stop if the thread receives a + * signal. The caller may call txg_wait_synced*() again + * to resume waiting for this txg. + */ + if (cv_wait_io_sig(&tx->tx_sync_done_cv, + &tx->tx_sync_lock) == 0) { + mutex_exit(&tx->tx_sync_lock); + return (B_TRUE); + } + } else { + cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + } } mutex_exit(&tx->tx_sync_lock); + return (B_FALSE); +} + +void +txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +{ + VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE)); +} + +/* + * Similar to a txg_wait_synced but it can be interrupted from a signal. + * Returns B_TRUE if the thread was signaled while waiting. + */ +boolean_t +txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg) +{ + return (txg_wait_synced_impl(dp, txg, B_TRUE)); } /* @@ -738,7 +768,7 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce) /* * If there isn't a txg syncing or in the pipeline, push another txg through - * the pipeline by queiscing the open txg. + * the pipeline by quiescing the open txg. */ void txg_kick(dsl_pool_t *dp) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 197f57b28578..84551f92a160 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -27,6 +27,7 @@ * Copyright 2016 Toomas Soome * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #include @@ -224,7 +225,7 @@ vdev_default_xlate(vdev_t *vd, const range_seg_t *in, range_seg_t *res) } /* - * Derive the enumerated alloction bias from string input. + * Derive the enumerated allocation bias from string input. * String origin is either the per-vdev zap or zpool(1M). */ static vdev_alloc_bias_t @@ -808,7 +809,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_resilver_txg); if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); + vdev_defer_resilver(vd); /* * In general, when importing a pool we want to ignore the @@ -1300,7 +1301,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) #ifndef _KERNEL /* - * To accomodate zdb_leak_init() fake indirect + * To accommodate zdb_leak_init() fake indirect * metaslabs, we allocate a metaslab group for * indirect vdevs which normally don't have one. */ @@ -1832,13 +1833,10 @@ vdev_open(vdev_t *vd) /* * Track the min and max ashift values for normal data devices. - * - * DJB - TBD these should perhaps be tracked per allocation class - * (e.g. spa_min_ashift is used to round up post compression buffers) */ if (vd->vdev_top == vd && vd->vdev_ashift != 0 && vd->vdev_alloc_bias == VDEV_BIAS_NONE && - vd->vdev_aux == NULL) { + vd->vdev_islog == 0 && vd->vdev_aux == NULL) { if (vd->vdev_ashift > spa->spa_max_ashift) spa->spa_max_ashift = vd->vdev_ashift; if (vd->vdev_ashift < spa->spa_min_ashift) @@ -1846,18 +1844,12 @@ vdev_open(vdev_t *vd) } /* - * If a leaf vdev has a DTL, and seems healthy, then kick off a - * resilver. But don't do this if we are doing a reopen for a scrub, - * since this would just restart the scrub we are already doing. + * If this is a leaf vdev, assess whether a resilver is needed. + * But don't do this if we are doing a reopen for a scrub, since + * this would just restart the scrub we are already doing. */ - if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && - vdev_resilver_needed(vd, NULL, NULL)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen) + dsl_scan_assess_vdev(spa->spa_dsl_pool, vd); return (0); } @@ -3223,6 +3215,20 @@ vdev_sync_done(vdev_t *vd, uint64_t txg) != NULL) metaslab_sync_done(msp, txg); + /* + * Because this function is only called on dirty vdevs, it's possible + * we won't consider all metaslabs for unloading on every + * txg. However, unless the system is largely idle it is likely that + * we will dirty all vdevs within a few txgs. + */ + for (int i = 0; i < vd->vdev_ms_count; i++) { + msp = vd->vdev_ms[i]; + mutex_enter(&msp->ms_lock); + if (msp->ms_sm != NULL) + metaslab_potentially_unload(msp, txg); + mutex_exit(&msp->ms_lock); + } + if (reassess) metaslab_sync_reassess(vd->vdev_mg); } @@ -3670,14 +3676,11 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd != rvd && vdev_writeable(vd->vdev_top)) vdev_state_dirty(vd->vdev_top); - if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, - SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + /* If a resilver isn't required, check if vdevs can be culled */ + if (vd->vdev_aux == NULL && !vdev_is_dead(vd) && + !dsl_scan_resilvering(spa->spa_dsl_pool) && + !dsl_scan_resilver_scheduled(spa->spa_dsl_pool)) + spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR); } @@ -4154,7 +4157,7 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta, * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion * factor. We must calculate this here and not at the root vdev * because the root vdev's psize-to-asize is simply the max of its - * childrens', thus not accurate enough for us. + * children's, thus not accurate enough for us. */ dspace_delta = vdev_deflated_space(vd, space_delta); @@ -4670,18 +4673,46 @@ vdev_deadman(vdev_t *vd, char *tag) } void -vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd) +vdev_defer_resilver(vdev_t *vd) { - for (uint64_t i = 0; i < vd->vdev_children; i++) - vdev_set_deferred_resilver(spa, vd->vdev_child[i]); + ASSERT(vd->vdev_ops->vdev_op_leaf); - if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) || - range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { - return; + vd->vdev_resilver_deferred = B_TRUE; + vd->vdev_spa->spa_resilver_deferred = B_TRUE; +} + +/* + * Clears the resilver deferred flag on all leaf devs under vd. Returns + * B_TRUE if we have devices that need to be resilvered and are available to + * accept resilver I/Os. + */ +boolean_t +vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx) +{ + boolean_t resilver_needed = B_FALSE; + spa_t *spa = vd->vdev_spa; + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + resilver_needed |= vdev_clear_resilver_deferred(cvd, tx); } - vd->vdev_resilver_deferred = B_TRUE; - spa->spa_resilver_deferred = B_TRUE; + if (vd == spa->spa_root_vdev && + spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { + spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + vdev_config_dirty(vd); + spa->spa_resilver_deferred = B_FALSE; + return (resilver_needed); + } + + if (!vdev_is_concrete(vd) || vd->vdev_aux || + !vd->vdev_ops->vdev_op_leaf) + return (resilver_needed); + + vd->vdev_resilver_deferred = B_FALSE; + + return (!vdev_is_dead(vd) && !vd->vdev_offline && + vdev_resilver_needed(vd, NULL, NULL)); } /* diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index 0f1d9448b590..b63b9f9795f9 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -46,7 +46,7 @@ * terribly wasteful of bandwidth. A more intelligent version of the cache * could keep track of access patterns and not do read-ahead unless it sees * at least two temporally close I/Os to the same region. Currently, only - * metadata I/O is inflated. A futher enhancement could take advantage of + * metadata I/O is inflated. A further enhancement could take advantage of * more semantic information about the I/O. And it could use something * faster than an AVL tree; that was chosen solely for convenience. * diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 1419ae6ad54a..661f0f1b727c 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -38,10 +38,21 @@ #include #include -char *zfs_vdev_scheduler = VDEV_SCHEDULER; +/* + * Unique identifier for the exclusive vdev holder. + */ static void *zfs_vdev_holder = VDEV_HOLDER; -/* size of the "reserved" partition, in blocks */ +/* + * Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the + * device is missing. The missing path may be transient since the links + * can be briefly removed and recreated in response to udev events. + */ +static unsigned zfs_vdev_open_timeout_ms = 1000; + +/* + * Size of the "reserved" partition, in blocks. + */ #define EFI_MIN_RESV_SIZE (16 * 1024) /* @@ -160,83 +171,13 @@ vdev_disk_error(zio_t *zio) zio->io_flags); } -/* - * Use the Linux 'noop' elevator for zfs managed block devices. This - * strikes the ideal balance by allowing the zfs elevator to do all - * request ordering and prioritization. While allowing the Linux - * elevator to do the maximum front/back merging allowed by the - * physical device. This yields the largest possible requests for - * the device with the lowest total overhead. - */ -static void -vdev_elevator_switch(vdev_t *v, char *elevator) -{ - vdev_disk_t *vd = v->vdev_tsd; - struct request_queue *q; - char *device; - int error; - - for (int c = 0; c < v->vdev_children; c++) - vdev_elevator_switch(v->vdev_child[c], elevator); - - if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL) - return; - - q = bdev_get_queue(vd->vd_bdev); - device = vd->vd_bdev->bd_disk->disk_name; - - /* - * Skip devices which are not whole disks (partitions). - * Device-mapper devices are excepted since they may be whole - * disks despite the vdev_wholedisk flag, in which case we can - * and should switch the elevator. If the device-mapper device - * does not have an elevator (i.e. dm-raid, dm-crypt, etc.) the - * "Skip devices without schedulers" check below will fail. - */ - if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0) - return; - - /* Leave existing scheduler when set to "none" */ - if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4)) - return; - - /* - * The elevator_change() function was available in kernels from - * 2.6.36 to 4.11. When not available fall back to using the user - * mode helper functionality to set the elevator via sysfs. This - * requires /bin/echo and sysfs to be mounted which may not be true - * early in the boot process. - */ -#ifdef HAVE_ELEVATOR_CHANGE - error = elevator_change(q, elevator); -#else -#define SET_SCHEDULER_CMD \ - "exec 0/sys/block/%s/queue/scheduler " \ - " 2>/dev/null; " \ - "echo %s" - - char *argv[] = { "/bin/sh", "-c", NULL, NULL }; - char *envp[] = { NULL }; - - argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); - error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - strfree(argv[2]); -#endif /* HAVE_ELEVATOR_CHANGE */ - if (error) { - zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d", - elevator, v->vdev_path, device, error); - } -} - static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { struct block_device *bdev; fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); - int count = 0, block_size; - int bdev_retry_count = 50; + hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms); vdev_disk_t *vd; /* Must have a pathname and it must be absolute. */ @@ -251,7 +192,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * partition force re-scanning the partition table while closed * in order to get an accurate updated block device size. Then * since udev may need to recreate the device links increase the - * open retry count before reporting the device as unavailable. + * open retry timeout before reporting the device as unavailable. */ vd = v->vdev_tsd; if (vd) { @@ -276,8 +217,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (!IS_ERR(bdev)) { int error = vdev_bdev_reread_part(bdev); vdev_bdev_close(bdev, mode); - if (error == 0) - bdev_retry_count = 100; + if (error == 0) { + timeout = MSEC2NSEC( + zfs_vdev_open_timeout_ms * 2); + } } } } else { @@ -310,12 +253,12 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * and it is reasonable to sleep and retry before giving up. In * practice delays have been observed to be on the order of 100ms. */ + hrtime_t start = gethrtime(); bdev = ERR_PTR(-ENXIO); - while (IS_ERR(bdev) && count < bdev_retry_count) { + while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) { bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { schedule_timeout(MSEC_TO_TICK(10)); - count++; } else if (IS_ERR(bdev)) { break; } @@ -323,7 +266,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (IS_ERR(bdev)) { int error = -PTR_ERR(bdev); - vdev_dbgmsg(v, "open error=%d count=%d", error, count); + vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error, + (u_longlong_t)(gethrtime() - start), + (u_longlong_t)timeout); vd->vd_bdev = NULL; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); @@ -337,7 +282,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, struct request_queue *q = bdev_get_queue(vd->vd_bdev); /* Determine the physical block size */ - block_size = vdev_bdev_block_size(vd->vd_bdev); + int block_size = vdev_bdev_block_size(vd->vd_bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; @@ -360,9 +305,6 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, /* Based on the minimum sector size set the block size */ *ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; - /* Try to set the io scheduler elevator algorithm */ - (void) vdev_elevator_switch(v, zfs_vdev_scheduler); - return (0); } @@ -545,7 +487,7 @@ vdev_bio_associate_blkg(struct bio *bio) ASSERT3P(q, !=, NULL); ASSERT3P(bio->bi_blkg, ==, NULL); - if (blkg_tryget(q->root_blkg)) + if (q->root_blkg && blkg_tryget(q->root_blkg)) bio->bi_blkg = q->root_blkg; } #define bio_associate_blkg vdev_bio_associate_blkg @@ -903,53 +845,41 @@ vdev_disk_rele(vdev_t *vd) /* XXX: Implement me as a vnode rele for the device */ } +vdev_ops_t vdev_disk_ops = { + .vdev_op_open = vdev_disk_open, + .vdev_op_close = vdev_disk_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_disk_io_start, + .vdev_op_io_done = vdev_disk_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_disk_hold, + .vdev_op_rele = vdev_disk_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ +}; + +/* + * The zfs_vdev_scheduler module option has been deprecated. Setting this + * value no longer has any effect. It has not yet been entirely removed + * to allow the module to be loaded if this option is specified in the + * /etc/modprobe.d/zfs.conf file. The following warning will be logged. + */ static int param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) { - spa_t *spa = NULL; - char *p; - - if (val == NULL) - return (SET_ERROR(-EINVAL)); - - if ((p = strchr(val, '\n')) != NULL) - *p = '\0'; - - if (spa_mode_global != 0) { - mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) { - if (spa_state(spa) != POOL_STATE_ACTIVE || - !spa_writeable(spa) || spa_suspended(spa)) - continue; - - spa_open_ref(spa, FTAG); - mutex_exit(&spa_namespace_lock); - vdev_elevator_switch(spa->spa_root_vdev, (char *)val); - mutex_enter(&spa_namespace_lock); - spa_close(spa, FTAG); - } - mutex_exit(&spa_namespace_lock); + int error = param_set_charp(val, kp); + if (error == 0) { + printk(KERN_INFO "The 'zfs_vdev_scheduler' module option " + "is not supported.\n"); } - return (param_set_charp(val, kp)); + return (error); } -vdev_ops_t vdev_disk_ops = { - vdev_disk_open, - vdev_disk_close, - vdev_default_asize, - vdev_disk_io_start, - vdev_disk_io_done, - NULL, - NULL, - vdev_disk_hold, - vdev_disk_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ -}; - +char *zfs_vdev_scheduler = "unused"; module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler, param_get_charp, &zfs_vdev_scheduler, 0644); MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler"); diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index c155057852a3..b79017f3a610 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -277,19 +277,19 @@ vdev_file_io_done(zio_t *zio) } vdev_ops_t vdev_file_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_FILE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_FILE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; void @@ -313,19 +313,19 @@ vdev_file_fini(void) #ifndef _KERNEL vdev_ops_t vdev_disk_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; #endif diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 68dfe83128a7..4539fa638ada 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -15,6 +15,7 @@ /* * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -902,7 +903,7 @@ vdev_obsolete_sm_object(vdev_t *vd, uint64_t *sm_obj) } int error = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap, - VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (sm_obj), 1, sm_obj); + VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (uint64_t), 1, sm_obj); if (error == ENOENT) { *sm_obj = 0; error = 0; @@ -1841,19 +1842,19 @@ vdev_indirect_io_done(zio_t *zio) } vdev_ops_t vdev_indirect_ops = { - vdev_indirect_open, - vdev_indirect_close, - vdev_default_asize, - vdev_indirect_io_start, - vdev_indirect_io_done, - NULL, - NULL, - NULL, - NULL, - vdev_indirect_remap, - NULL, - VDEV_TYPE_INDIRECT, /* name of this vdev type */ - B_FALSE /* leaf vdev */ + .vdev_op_open = vdev_indirect_open, + .vdev_op_close = vdev_indirect_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_indirect_io_start, + .vdev_op_io_done = vdev_indirect_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = vdev_indirect_remap, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_INDIRECT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_indirect_births.c b/module/zfs/vdev_indirect_births.c index 1c44a64287d3..99b83c392257 100644 --- a/module/zfs/vdev_indirect_births.c +++ b/module/zfs/vdev_indirect_births.c @@ -70,7 +70,7 @@ vdev_indirect_births_close(vdev_indirect_births_t *vib) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - kmem_free(vib->vib_entries, births_size); + vmem_free(vib->vib_entries, births_size); vib->vib_entries = NULL; } @@ -108,7 +108,7 @@ vdev_indirect_births_open(objset_t *os, uint64_t births_object) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - vib->vib_entries = kmem_alloc(births_size, KM_SLEEP); + vib->vib_entries = vmem_alloc(births_size, KM_SLEEP); VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0, births_size, vib->vib_entries, DMU_READ_PREFETCH)); } @@ -148,10 +148,10 @@ vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); - new_entries = kmem_alloc(new_size, KM_SLEEP); + new_entries = vmem_alloc(new_size, KM_SLEEP); if (old_size > 0) { bcopy(vib->vib_entries, new_entries, old_size); - kmem_free(vib->vib_entries, old_size); + vmem_free(vib->vib_entries, old_size); } new_entries[vib->vib_phys->vib_count - 1] = vibe; vib->vib_entries = new_entries; diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c index b1590132636b..9958a2958322 100644 --- a/module/zfs/vdev_initialize.c +++ b/module/zfs/vdev_initialize.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * Value that is written to disk during initialization. @@ -415,7 +416,7 @@ vdev_initialize_load(vdev_t *vd) * Convert the logical range into a physical range and add it to our * avl tree. */ -void +static void vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size) { vdev_t *vd = arg; @@ -599,7 +600,7 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list) } /* - * Stop initializing a device, with the resultant initialing state being + * Stop initializing a device, with the resultant initializing state being * tgt_state. For blocking behavior pass NULL for vd_list. Otherwise, when * a list_t is provided the stopping vdev is inserted in to the list. Callers * are then required to call vdev_initialize_stop_wait() to block for all the diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index ff1526082376..4919ac71ce5f 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -21,8 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -603,7 +602,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * zfs_remove_max_segment, so we need at least one entry * per zfs_remove_max_segment of allocated data. */ - seg_count += to_alloc / zfs_remove_max_segment; + seg_count += to_alloc / spa_remove_max_segment(spa); fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE, seg_count * diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 59cc2dcdd2ca..cf8402dcc80a 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -282,10 +282,11 @@ vdev_mirror_map_init(zio_t *zio) } /* - * If we do not trust the pool config, some DVAs might be - * invalid or point to vdevs that do not exist. We skip them. + * If the pool cannot be written to, then infer that some + * DVAs might be invalid or point to vdevs that do not exist. + * We skip them. */ - if (!spa_trust_config(spa)) { + if (!spa_writeable(spa)) { ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); int j = 0; for (int i = 0; i < c; i++) { @@ -309,6 +310,13 @@ vdev_mirror_map_init(zio_t *zio) mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); mc->mc_offset = DVA_GET_OFFSET(&dva[c]); + if (mc->mc_vd == NULL) { + kmem_free(mm, vdev_mirror_map_size( + mm->mm_children)); + zio->io_vsd = NULL; + zio->io_error = ENXIO; + return (NULL); + } } } else { /* @@ -485,7 +493,7 @@ vdev_mirror_preferred_child_randomize(zio_t *zio) /* * Try to find a vdev whose DTL doesn't contain the block we want to read - * prefering vdevs based on determined load. + * preferring vdevs based on determined load. * * Try to find a child whose DTL doesn't contain the block we want to read. * If we can't, try the read on any vdev we haven't already tried. @@ -786,51 +794,51 @@ vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_mirror_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_MIRROR, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_MIRROR, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_replacing_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_REPLACING, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_REPLACING, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_spare_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_SPARE, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_SPARE, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index d85993bff052..205b23eba7f5 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -80,33 +80,33 @@ vdev_missing_io_done(zio_t *zio) } vdev_ops_t vdev_missing_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_MISSING, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_MISSING, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; vdev_ops_t vdev_hole_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_HOLE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_HOLE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index e74df76b7530..d3d9a6baa4a3 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -709,6 +709,18 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) do { dio = nio; nio = AVL_NEXT(t, dio); + zio_add_child(dio, aio); + vdev_queue_io_remove(vq, dio); + } while (dio != last); + + /* + * We need to drop the vdev queue's lock during zio_execute() to + * avoid a deadlock that we could encounter due to lock order + * reversal between vq_lock and io_lock in zio_change_priority(). + * Use the dropped lock to do memory copy without congestion. + */ + mutex_exit(&vq->vq_lock); + while ((dio = zio_walk_parents(aio, &zl)) != NULL) { ASSERT3U(dio->io_type, ==, aio->io_type); if (dio->io_flags & ZIO_FLAG_NODATA) { @@ -720,16 +732,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) dio->io_offset - aio->io_offset, 0, dio->io_size); } - zio_add_child(dio, aio); - vdev_queue_io_remove(vq, dio); - } while (dio != last); - - /* - * We need to drop the vdev queue's lock to avoid a deadlock that we - * could encounter since this I/O will complete immediately. - */ - mutex_exit(&vq->vq_lock); - while ((dio = zio_walk_parents(aio, &zl)) != NULL) { zio_vdev_io_bypass(dio); zio_execute(dio); } @@ -891,7 +893,7 @@ vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority) * ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio * code to issue IOs without adding them to the vdev queue. In this * case, the zio is already going to be issued as quickly as possible - * and so it doesn't need any reprioitization to help. + * and so it doesn't need any reprioritization to help. */ if (zio->io_priority == ZIO_PRIORITY_NOW) return; diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 7b428dfc4b10..75ce22bf49a1 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -103,7 +103,7 @@ * R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1 * = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1 * - * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival + * We chose 1, 2, and 4 as our generators because 1 corresponds to the trivial * XOR operation, and 2 and 4 can be computed quickly and generate linearly- * independent coefficients. (There are no additional coefficients that have * this property which is why the uncorrected Plank method breaks down.) @@ -551,7 +551,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, /* * If all data stored spans all columns, there's a danger that parity * will always be on the same device and, since parity isn't read - * during normal operation, that that device's I/O bandwidth won't be + * during normal operation, that device's I/O bandwidth won't be * used effectively. We therefore switch the parity every 1MB. * * ... at least that was, ostensibly, the theory. As a practical @@ -2112,7 +2112,7 @@ vdev_raidz_io_start(zio_t *zio) if (vdrz->vd_logical_width != vdrz->vd_physical_width) { /* XXX rangelock not needed after expansion completes */ locked_range_t *lr = - rangelock_enter(&vdrz->vn_vre.vre_rangelock, + zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock, zio->io_offset, zio->io_size, RL_READER); rm = vdev_raidz_map_alloc_expanded(zio->io_abd, @@ -2911,7 +2911,7 @@ vdev_raidz_io_done(zio_t *zio) } } if (rm->rm_lr != NULL) { - rangelock_exit(rm->rm_lr); + zfs_rangelock_exit(rm->rm_lr); rm->rm_lr = NULL; } } @@ -2932,7 +2932,7 @@ vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded) /* * Determine if any portion of the provided block resides on a child vdev * with a dirty DTL and therefore needs to be resilvered. The function - * assumes that at least one DTL is dirty which imples that full stripe + * assumes that at least one DTL is dirty which implies that full stripe * width blocks must be resilvered. */ static boolean_t @@ -3011,7 +3011,7 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) * Ensure there are no i/os to the range that is being committed. * XXX This might be overkill? */ - locked_range_t *lr = rangelock_enter(&vre->vre_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&vre->vre_rangelock, vre->vre_offset_phys, vre->vre_offset_pertxg[txgoff] - vre->vre_offset_phys, RL_WRITER); @@ -3021,7 +3021,7 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) */ vre->vre_offset_phys = vre->vre_offset_pertxg[txgoff]; vre->vre_offset_pertxg[txgoff] = 0; - rangelock_exit(lr); + zfs_rangelock_exit(lr); /* * vre_offset_phys will be added to the on-disk config by @@ -3096,7 +3096,7 @@ raidz_reflow_write_done(zio_t *zio) cv_signal(&vre->vre_cv); mutex_exit(&vre->vre_lock); - rangelock_exit(rra->rra_lr); + zfs_rangelock_exit(rra->rra_lr); kmem_free(rra, sizeof (*rra)); spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); @@ -3169,7 +3169,7 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra), KM_SLEEP); rra->rra_vre = vre; - rra->rra_lr = rangelock_enter(&vre->vre_rangelock, + rra->rra_lr = zfs_rangelock_enter(&vre->vre_rangelock, offset, length, RL_WRITER); mutex_enter(&vre->vre_lock); @@ -3482,7 +3482,7 @@ vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv) vdrz->vn_vre.vre_offset_phys = UINT64_MAX; mutex_init(&vdrz->vn_vre.vre_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vdrz->vn_vre.vre_cv, NULL, CV_DEFAULT, NULL); - rangelock_init(&vdrz->vn_vre.vre_rangelock, NULL, NULL); + zfs_rangelock_init(&vdrz->vn_vre.vre_rangelock, NULL, NULL); uint_t children; nvlist_t **child; @@ -3640,17 +3640,17 @@ spa_raidz_expand_get_stats(spa_t *spa, pool_raidz_expand_stat_t *pres) vdev_ops_t vdev_raidz_ops = { - vdev_raidz_open, - vdev_raidz_close, - vdev_raidz_asize, - vdev_raidz_io_start, - vdev_raidz_io_done, - vdev_raidz_state_change, - vdev_raidz_need_resilver, - NULL, - NULL, - NULL, - vdev_raidz_xlate, - VDEV_TYPE_RAIDZ, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_raidz_open, + .vdev_op_close = vdev_raidz_close, + .vdev_op_asize = vdev_raidz_asize, + .vdev_op_io_start = vdev_raidz_io_start, + .vdev_op_io_done = vdev_raidz_io_done, + .vdev_op_state_change = vdev_raidz_state_change, + .vdev_op_need_resilver = vdev_raidz_need_resilver, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_raidz_xlate, + .vdev_op_type = VDEV_TYPE_RAIDZ, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index e6112bc02137..576d33befaa9 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -27,9 +27,9 @@ #include #include #include - #include #include +#include extern boolean_t raidz_will_scalar_work(void); @@ -87,6 +87,7 @@ static uint32_t user_sel_impl = IMPL_FASTEST; static size_t raidz_supp_impl_cnt = 0; static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)]; +#if defined(_KERNEL) /* * kstats values for supported implementations * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s] @@ -95,14 +96,19 @@ static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1]; /* kstat for benchmarked implementations */ static kstat_t *raidz_math_kstat = NULL; +#endif /* - * Selects the raidz operation for raidz_map - * If rm_ops is set to NULL original raidz implementation will be used + * Returns the RAIDZ operations for raidz_map() parity calculations. When + * a SIMD implementation is not allowed in the current context, then fallback + * to the fastest generic implementation. */ -raidz_impl_ops_t * -vdev_raidz_math_get_ops() +const raidz_impl_ops_t * +vdev_raidz_math_get_ops(void) { + if (!kfpu_allowed()) + return (&vdev_raidz_scalar_impl); + raidz_impl_ops_t *ops = NULL; const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); @@ -111,18 +117,14 @@ vdev_raidz_math_get_ops() ASSERT(raidz_math_initialized); ops = &vdev_raidz_fastest_impl; break; -#if !defined(_KERNEL) case IMPL_CYCLE: - { + /* Cycle through all supported implementations */ ASSERT(raidz_math_initialized); ASSERT3U(raidz_supp_impl_cnt, >, 0); - /* Cycle through all supported implementations */ static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt; ops = raidz_supp_impl[idx]; - } - break; -#endif + break; case IMPL_ORIGINAL: ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl; break; @@ -273,6 +275,8 @@ const char *raidz_rec_name[] = { "rec_pq", "rec_pr", "rec_qr", "rec_pqr" }; +#if defined(_KERNEL) + #define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1) static int @@ -435,21 +439,21 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) } } } +#endif -void -vdev_raidz_math_init(void) +/* + * Initialize and benchmark all supported implementations. + */ +static void +benchmark_raidz(void) { raidz_impl_ops_t *curr_impl; - zio_t *bench_zio = NULL; - raidz_map_t *bench_rm = NULL; - uint64_t bench_parity; - int i, c, fn; + int i, c; - /* move supported impl into raidz_supp_impl */ + /* Move supported impl into raidz_supp_impl */ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i]; - /* initialize impl */ if (curr_impl->init) curr_impl->init(); @@ -459,20 +463,12 @@ vdev_raidz_math_init(void) membar_producer(); /* complete raidz_supp_impl[] init */ raidz_supp_impl_cnt = c; /* number of supported impl */ -#if !defined(_KERNEL) - /* Skip benchmarking and use last implementation as fastest */ - memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1], - sizeof (vdev_raidz_fastest_impl)); - strcpy(vdev_raidz_fastest_impl.name, "fastest"); - - raidz_math_initialized = B_TRUE; - - /* Use 'cycle' math selection method for userspace */ - VERIFY0(vdev_raidz_impl_set("cycle")); - return; -#endif +#if defined(_KERNEL) + zio_t *bench_zio = NULL; + raidz_map_t *bench_rm = NULL; + uint64_t bench_parity; - /* Fake an zio and run the benchmark on a warmed up buffer */ + /* Fake a zio and run the benchmark on a warmed up buffer */ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); bench_zio->io_offset = 0; bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ @@ -480,7 +476,7 @@ vdev_raidz_math_init(void) memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE); /* Benchmark parity generation methods */ - for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { + for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) { bench_parity = fn + 1; /* New raidz_map is needed for each generate_p/q/r */ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, @@ -495,7 +491,7 @@ vdev_raidz_math_init(void) bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, BENCH_COLS, PARITY_PQR); - for (fn = 0; fn < RAIDZ_REC_NUM; fn++) + for (int fn = 0; fn < RAIDZ_REC_NUM; fn++) benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl); vdev_raidz_map_free(bench_rm); @@ -503,11 +499,29 @@ vdev_raidz_math_init(void) /* cleanup the bench zio */ abd_free(bench_zio->io_abd); kmem_free(bench_zio, sizeof (zio_t)); +#else + /* + * Skip the benchmark in user space to avoid impacting libzpool + * consumers (zdb, zhack, zinject, ztest). The last implementation + * is assumed to be the fastest and used by default. + */ + memcpy(&vdev_raidz_fastest_impl, + raidz_supp_impl[raidz_supp_impl_cnt - 1], + sizeof (vdev_raidz_fastest_impl)); + strcpy(vdev_raidz_fastest_impl.name, "fastest"); +#endif /* _KERNEL */ +} - /* install kstats for all impl */ +void +vdev_raidz_math_init(void) +{ + /* Determine the fastest available implementation. */ + benchmark_raidz(); + +#if defined(_KERNEL) + /* Install kstats for all implementations */ raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); - if (raidz_math_kstat != NULL) { raidz_math_kstat->ks_data = NULL; raidz_math_kstat->ks_ndata = UINT32_MAX; @@ -517,6 +531,7 @@ vdev_raidz_math_init(void) raidz_math_kstat_addr); kstat_install(raidz_math_kstat); } +#endif /* Finish initialization */ atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl); @@ -527,15 +542,15 @@ void vdev_raidz_math_fini(void) { raidz_impl_ops_t const *curr_impl; - int i; +#if defined(_KERNEL) if (raidz_math_kstat != NULL) { kstat_delete(raidz_math_kstat); raidz_math_kstat = NULL; } +#endif - /* fini impl */ - for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { + for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = raidz_all_maths[i]; if (curr_impl->fini) curr_impl->fini(); @@ -546,9 +561,7 @@ static const struct { char *name; uint32_t sel; } math_impl_opts[] = { -#if !defined(_KERNEL) { "cycle", IMPL_CYCLE }, -#endif { "fastest", IMPL_FASTEST }, { "original", IMPL_ORIGINAL }, { "scalar", IMPL_SCALAR } diff --git a/module/zfs/vdev_raidz_math_aarch64_neon.c b/module/zfs/vdev_raidz_math_aarch64_neon.c index e3ad06776503..0a67ceb84920 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon.c +++ b/module/zfs/vdev_raidz_math_aarch64_neon.c @@ -207,7 +207,7 @@ DEFINE_REC_METHODS(aarch64_neon); static boolean_t raidz_will_aarch64_neon_work(void) { - return (B_TRUE); // __arch64__ requires NEON + return (kfpu_allowed()); } const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = { diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h index 024917417a55..0ea2ad611c77 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h +++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h @@ -42,7 +42,7 @@ /* * Here we need registers not used otherwise. * They will be used in unused ASM for the case - * with more registers than required... but GGC + * with more registers than required... but GCC * will still need to make sure the constraints * are correct, and duplicate constraints are illegal * ... and we use the "register" number as a name diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c index f8688a06a8f6..e072f51cd635 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c +++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c @@ -217,7 +217,7 @@ DEFINE_REC_METHODS(aarch64_neonx2); static boolean_t raidz_will_aarch64_neonx2_work(void) { - return (B_TRUE); // __arch64__ requires NEON + return (kfpu_allowed()); } const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = { diff --git a/module/zfs/vdev_raidz_math_avx2.c b/module/zfs/vdev_raidz_math_avx2.c index 063d29bcd8bf..a12eb672081f 100644 --- a/module/zfs/vdev_raidz_math_avx2.c +++ b/module/zfs/vdev_raidz_math_avx2.c @@ -396,7 +396,7 @@ DEFINE_REC_METHODS(avx2); static boolean_t raidz_will_avx2_work(void) { - return (zfs_avx_available() && zfs_avx2_available()); + return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available()); } const raidz_impl_ops_t vdev_raidz_avx2_impl = { diff --git a/module/zfs/vdev_raidz_math_avx512bw.c b/module/zfs/vdev_raidz_math_avx512bw.c index d605653db3f1..2f545c9ec078 100644 --- a/module/zfs/vdev_raidz_math_avx512bw.c +++ b/module/zfs/vdev_raidz_math_avx512bw.c @@ -393,9 +393,8 @@ DEFINE_REC_METHODS(avx512bw); static boolean_t raidz_will_avx512bw_work(void) { - return (zfs_avx_available() && - zfs_avx512f_available() && - zfs_avx512bw_available()); + return (kfpu_allowed() && zfs_avx_available() && + zfs_avx512f_available() && zfs_avx512bw_available()); } const raidz_impl_ops_t vdev_raidz_avx512bw_impl = { diff --git a/module/zfs/vdev_raidz_math_avx512f.c b/module/zfs/vdev_raidz_math_avx512f.c index f4e4560ced83..75af7a8eea96 100644 --- a/module/zfs/vdev_raidz_math_avx512f.c +++ b/module/zfs/vdev_raidz_math_avx512f.c @@ -470,9 +470,8 @@ DEFINE_REC_METHODS(avx512f); static boolean_t raidz_will_avx512f_work(void) { - return (zfs_avx_available() && - zfs_avx2_available() && - zfs_avx512f_available()); + return (kfpu_allowed() && zfs_avx_available() && + zfs_avx2_available() && zfs_avx512f_available()); } const raidz_impl_ops_t vdev_raidz_avx512f_impl = { diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c index a693bff63ffb..cd742e146ca6 100644 --- a/module/zfs/vdev_raidz_math_scalar.c +++ b/module/zfs/vdev_raidz_math_scalar.c @@ -142,6 +142,7 @@ static const struct { a.b[6] = mul_lt[a.b[6]]; \ a.b[5] = mul_lt[a.b[5]]; \ a.b[4] = mul_lt[a.b[4]]; \ + /* falls through */ \ case 4: \ a.b[3] = mul_lt[a.b[3]]; \ a.b[2] = mul_lt[a.b[2]]; \ diff --git a/module/zfs/vdev_raidz_math_sse2.c b/module/zfs/vdev_raidz_math_sse2.c index 9985da273643..5b3a9385c9d8 100644 --- a/module/zfs/vdev_raidz_math_sse2.c +++ b/module/zfs/vdev_raidz_math_sse2.c @@ -607,7 +607,7 @@ DEFINE_REC_METHODS(sse2); static boolean_t raidz_will_sse2_work(void) { - return (zfs_sse_available() && zfs_sse2_available()); + return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available()); } const raidz_impl_ops_t vdev_raidz_sse2_impl = { diff --git a/module/zfs/vdev_raidz_math_ssse3.c b/module/zfs/vdev_raidz_math_ssse3.c index 047a48d544f1..62247cf8eb8d 100644 --- a/module/zfs/vdev_raidz_math_ssse3.c +++ b/module/zfs/vdev_raidz_math_ssse3.c @@ -399,8 +399,8 @@ DEFINE_REC_METHODS(ssse3); static boolean_t raidz_will_ssse3_work(void) { - return (zfs_sse_available() && zfs_sse2_available() && - zfs_ssse3_available()); + return (kfpu_allowed() && zfs_sse_available() && + zfs_sse2_available() && zfs_ssse3_available()); } const raidz_impl_ops_t vdev_raidz_ssse3_impl = { diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index f2d18d9257bd..340de255720d 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -21,7 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -100,6 +101,8 @@ int zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * removing a device. This can be no larger than SPA_MAXBLOCKSIZE. If * there is a performance problem with attempting to allocate large blocks, * consider decreasing this. + * + * See also the accessor function spa_remove_max_segment(). */ int zfs_remove_max_segment = SPA_MAXBLOCKSIZE; @@ -951,8 +954,10 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, vdev_indirect_mapping_entry_t *entry; dva_t dst = {{ 0 }}; uint64_t start = range_tree_min(segs); + ASSERT0(P2PHASE(start, 1 << spa->spa_min_ashift)); ASSERT3U(maxalloc, <=, SPA_MAXBLOCKSIZE); + ASSERT0(P2PHASE(maxalloc, 1 << spa->spa_min_ashift)); uint64_t size = range_tree_span(segs); if (range_tree_span(segs) > maxalloc) { @@ -983,6 +988,7 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, } } ASSERT3U(size, <=, maxalloc); + ASSERT0(P2PHASE(size, 1 << spa->spa_min_ashift)); /* * An allocation class might not have any remaining vdevs or space @@ -1026,11 +1032,11 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, /* * We can't have any padding of the allocated size, otherwise we will - * misunderstand what's allocated, and the size of the mapping. - * The caller ensures this will be true by passing in a size that is - * aligned to the worst (highest) ashift in the pool. + * misunderstand what's allocated, and the size of the mapping. We + * prevent padding by ensuring that all devices in the pool have the + * same ashift, and the allocation size is a multiple of the ashift. */ - ASSERT3U(DVA_GET_ASIZE(&dst), ==, size); + VERIFY3U(DVA_GET_ASIZE(&dst), ==, size); entry = kmem_zalloc(sizeof (vdev_indirect_mapping_entry_t), KM_SLEEP); DVA_MAPPING_SET_SRC_OFFSET(&entry->vime_mapping, start); @@ -1363,6 +1369,20 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, range_tree_destroy(segs); } +/* + * The size of each removal mapping is limited by the tunable + * zfs_remove_max_segment, but we must adjust this to be a multiple of the + * pool's ashift, so that we don't try to split individual sectors regardless + * of the tunable value. (Note that device removal requires that all devices + * have the same ashift, so there's no difference between spa_min_ashift and + * spa_max_ashift.) The raw tunable should not be used elsewhere. + */ +uint64_t +spa_remove_max_segment(spa_t *spa) +{ + return (P2ROUNDUP(zfs_remove_max_segment, 1 << spa->spa_max_ashift)); +} + /* * The removal thread operates in open context. It iterates over all * allocated space in the vdev, by loading each metaslab's spacemap. @@ -1385,7 +1405,7 @@ spa_vdev_remove_thread(void *arg) spa_t *spa = arg; spa_vdev_removal_t *svr = spa->spa_vdev_removal; vdev_copy_arg_t vca; - uint64_t max_alloc = zfs_remove_max_segment; + uint64_t max_alloc = spa_remove_max_segment(spa); uint64_t last_txg = 0; spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); @@ -1498,7 +1518,7 @@ spa_vdev_remove_thread(void *arg) dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); - dmu_tx_hold_space(tx, SPA_MAXBLOCKSIZE); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); uint64_t txg = dmu_tx_get_txg(tx); @@ -1511,7 +1531,7 @@ spa_vdev_remove_thread(void *arg) vd = vdev_lookup_top(spa, svr->svr_vdev_id); if (txg != last_txg) - max_alloc = zfs_remove_max_segment; + max_alloc = spa_remove_max_segment(spa); last_txg = txg; spa_vdev_copy_impl(vd, svr, &vca, &max_alloc, tx); @@ -1841,6 +1861,13 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) spa_vdev_config_exit(spa, NULL, *txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); + /* + * Cancel any initialize or TRIM which was in progress. + */ + vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED); + vdev_trim_stop_all(vd, VDEV_TRIM_CANCELED); + vdev_autotrim_stop_wait(vd); + /* * Evacuate the device. We don't hold the config lock as * writer since we need to do I/O but we do keep the @@ -1871,12 +1898,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) vdev_metaslab_fini(vd); spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG); - - /* Stop initializing and TRIM */ - vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED); - vdev_trim_stop_all(vd, VDEV_TRIM_CANCELED); - vdev_autotrim_stop_wait(vd); - *txg = spa_vdev_config_enter(spa); sysevent_t *ev = spa_event_create(spa, vd, NULL, @@ -1917,6 +1938,9 @@ spa_vdev_remove_top_check(vdev_t *vd) if (vd != vd->vdev_top) return (SET_ERROR(ENOTSUP)); + if (!vdev_is_concrete(vd)) + return (SET_ERROR(ENOTSUP)); + if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL)) return (SET_ERROR(ENOTSUP)); @@ -2113,7 +2137,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) int error = 0, error_log; boolean_t locked = MUTEX_HELD(&spa_namespace_lock); sysevent_t *ev = NULL; - char *vd_type = NULL, *vd_path = NULL, *vd_path_log = NULL; + char *vd_type = NULL, *vd_path = NULL; ASSERT(spa_writeable(spa)); @@ -2148,7 +2172,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) ESC_ZFS_VDEV_REMOVE_AUX); vd_type = VDEV_TYPE_SPARE; - vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + vd_path = spa_strdup(fnvlist_lookup_string( + nv, ZPOOL_CONFIG_PATH)); spa_vdev_remove_aux(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares, nv); spa_load_spares(spa); @@ -2161,7 +2186,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { vd_type = VDEV_TYPE_L2CACHE; - vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + vd_path = spa_strdup(fnvlist_lookup_string( + nv, ZPOOL_CONFIG_PATH)); /* * Cache devices can always be removed. */ @@ -2174,7 +2200,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) } else if (vd != NULL && vd->vdev_islog) { ASSERT(!locked); vd_type = VDEV_TYPE_LOG; - vd_path = (vd->vdev_path != NULL) ? vd->vdev_path : "-"; + vd_path = spa_strdup((vd->vdev_path != NULL) ? + vd->vdev_path : "-"); error = spa_vdev_remove_log(vd, &txg); } else if (vd != NULL) { ASSERT(!locked); @@ -2186,9 +2213,6 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) error = SET_ERROR(ENOENT); } - if (vd_path != NULL) - vd_path_log = spa_strdup(vd_path); - error_log = error; if (!locked) @@ -2201,12 +2225,12 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) * Doing that would prevent the txg sync from actually happening, * causing a deadlock. */ - if (error_log == 0 && vd_type != NULL && vd_path_log != NULL) { + if (error_log == 0 && vd_type != NULL && vd_path != NULL) { spa_history_log_internal(spa, "vdev remove", NULL, - "%s vdev (%s) %s", spa_name(spa), vd_type, vd_path_log); + "%s vdev (%s) %s", spa_name(spa), vd_type, vd_path); } - if (vd_path_log != NULL) - spa_strfree(vd_path_log); + if (vd_path != NULL) + spa_strfree(vd_path); if (ev != NULL) spa_event_post(ev); diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index e40b7ce8e4e8..7170f7013608 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -140,17 +140,17 @@ vdev_root_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_root_ops = { - vdev_root_open, - vdev_root_close, - vdev_default_asize, - NULL, /* io_start - not applicable to the root */ - NULL, /* io_done - not applicable to the root */ - vdev_root_state_change, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_ROOT, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_root_open, + .vdev_op_close = vdev_root_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = NULL, /* not applicable to the root */ + .vdev_op_io_done = NULL, /* not applicable to the root */ + .vdev_op_state_change = vdev_root_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 6d8c498042c9..30f62ac43b62 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -49,6 +49,36 @@ #include #include +/* + * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object + * (all leaf blocks) when we start iterating over it. + * + * For zap_cursor_init(), the callers all intend to iterate through all the + * entries. There are a few cases where an error (typically i/o error) could + * cause it to bail out early. + * + * For zap_cursor_init_serialized(), there are callers that do the iteration + * outside of ZFS. Typically they would iterate over everything, but we + * don't have control of that. E.g. zfs_ioc_snapshot_list_next(), + * zcp_snapshots_iter(), and other iterators over things in the MOS - these + * are called by /sbin/zfs and channel programs. The other example is + * zfs_readdir() which iterates over directory entries for the getdents() + * syscall. /sbin/ls iterates to the end (unless it receives a signal), but + * userland doesn't have to. + * + * Given that the ZAP entries aren't returned in a specific order, the only + * legitimate use cases for partial iteration would be: + * + * 1. Pagination: e.g. you only want to display 100 entries at a time, so you + * get the first 100 and then wait for the user to hit "next page", which + * they may never do). + * + * 2. You want to know if there are more than X entries, without relying on + * the zfs-specific implementation of the directory's st_size (which is + * the number of entries). + */ +int zap_iterate_prefetch = B_TRUE; + int fzap_default_block_shift = 14; /* 16k blocksize */ extern inline zap_phys_t *zap_f_phys(zap_t *zap); @@ -1189,6 +1219,21 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) /* retrieve the next entry at or after zc_hash/zc_cd */ /* if no entry, return ENOENT */ + /* + * If we are reading from the beginning, we're almost certain to + * iterate over the entire ZAP object. If there are multiple leaf + * blocks (freeblk > 2), prefetch the whole object (up to + * dmu_prefetch_max bytes), so that we read the leaf blocks + * concurrently. (Unless noprefetch was requested via + * zap_cursor_init_noprefetch()). + */ + if (zc->zc_hash == 0 && zap_iterate_prefetch && + zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) { + dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0, + zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap), + ZIO_PRIORITY_ASYNC_READ); + } + if (zc->zc_leaf && (ZAP_HASH_IDX(zc->zc_hash, zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) != @@ -1333,3 +1378,12 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs) } } } + +#if defined(_KERNEL) +/* BEGIN CSTYLED */ +module_param(zap_iterate_prefetch, int, 0644); +MODULE_PARM_DESC(zap_iterate_prefetch, + "When iterating ZAP object, prefetch it"); + +/* END CSTYLED */ +#endif diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index fa369f797548..467812ff637c 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -1472,9 +1472,9 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, * Routines for iterating over the attributes. */ -void -zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, - uint64_t serialized) +static void +zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized, boolean_t prefetch) { zc->zc_objset = os; zc->zc_zap = NULL; @@ -1483,12 +1483,33 @@ zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, zc->zc_serialized = serialized; zc->zc_hash = 0; zc->zc_cd = 0; + zc->zc_prefetch = prefetch; +} +void +zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized) +{ + zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); } +/* + * Initialize a cursor at the beginning of the ZAP object. The entire + * ZAP object will be prefetched. + */ void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { - zap_cursor_init_serialized(zc, os, zapobj, 0); + zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); +} + +/* + * Initialize a cursor at the beginning, but request that we not prefetch + * the entire ZAP object. + */ +void +zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) +{ + zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); } void diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 4894df11d5fb..44e4d230a30f 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -66,7 +66,7 @@ * consuming excessive system or running forever. If one of these limits is * hit, the channel program will be stopped immediately and return from * zcp_eval() with an error code. No attempt will be made to roll back or undo - * any changes made by the channel program before the error occured. + * any changes made by the channel program before the error occurred. * Consumers invoking zcp_eval() from elsewhere in the kernel may pass a time * limit of 0, disabling the time limit. * @@ -77,7 +77,7 @@ * In place of a return value, an error message will also be returned in the * 'result' nvlist containing information about the error. No attempt will be * made to roll back or undo any changes made by the channel program before the - * error occured. + * error occurred. * * 3. If an error occurs inside a ZFS library call which returns an error code, * the error is returned to the Lua script to be handled as desired. @@ -118,21 +118,6 @@ static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int); static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *, int); -typedef struct zcp_alloc_arg { - boolean_t aa_must_succeed; - int64_t aa_alloc_remaining; - int64_t aa_alloc_limit; -} zcp_alloc_arg_t; - -typedef struct zcp_eval_arg { - lua_State *ea_state; - zcp_alloc_arg_t *ea_allocargs; - cred_t *ea_cred; - nvlist_t *ea_outnvl; - int ea_result; - uint64_t ea_instrlimit; -} zcp_eval_arg_t; - /* * The outer-most error callback handler for use with lua_pcall(). On * error Lua will call this callback with a single argument that @@ -175,7 +160,7 @@ zcp_argerror(lua_State *state, int narg, const char *msg, ...) * of a function call. * * If an error occurs, the cleanup function will be invoked exactly once and - * then unreigstered. + * then unregistered. * * Returns the registered cleanup handler so the caller can deregister it * if no error occurs. @@ -452,7 +437,7 @@ zcp_lua_to_nvlist_helper(lua_State *state) static void zcp_convert_return_values(lua_State *state, nvlist_t *nvl, - const char *key, zcp_eval_arg_t *evalargs) + const char *key, int *result) { int err; VERIFY3U(1, ==, lua_gettop(state)); @@ -464,7 +449,7 @@ zcp_convert_return_values(lua_State *state, nvlist_t *nvl, err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */ if (err != 0) { zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR); - evalargs->ea_result = SET_ERROR(ECHRNG); + *result = SET_ERROR(ECHRNG); } } @@ -791,19 +776,32 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) static void zcp_lua_counthook(lua_State *state, lua_Debug *ar) { - /* - * If we're called, check how many instructions the channel program has - * executed so far, and compare against the limit. - */ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); zcp_run_info_t *ri = lua_touserdata(state, -1); + /* + * Check if we were canceled while waiting for the + * txg to sync or from our open context thread + */ + if (ri->zri_canceled || + (!ri->zri_sync && issig(JUSTLOOKING) && issig(FORREAL))) { + ri->zri_canceled = B_TRUE; + (void) lua_pushstring(state, "Channel program was canceled."); + (void) lua_error(state); + /* Unreachable */ + } + + /* + * Check how many instructions the channel program has + * executed so far, and compare against the limit. + */ ri->zri_curinstrs += zfs_lua_check_instrlimit_interval; if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) { ri->zri_timed_out = B_TRUE; (void) lua_pushstring(state, "Channel program timed out."); (void) lua_error(state); + /* Unreachable */ } } @@ -816,31 +814,25 @@ zcp_panic_cb(lua_State *state) } static void -zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) +zcp_eval_impl(dmu_tx_t *tx, zcp_run_info_t *ri) { int err; - zcp_run_info_t ri; - lua_State *state = evalargs->ea_state; + lua_State *state = ri->zri_state; VERIFY3U(3, ==, lua_gettop(state)); + /* finish initializing our runtime state */ + ri->zri_pool = dmu_tx_pool(tx); + ri->zri_tx = tx; + list_create(&ri->zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), + offsetof(zcp_cleanup_handler_t, zch_node)); + /* * Store the zcp_run_info_t struct for this run in the Lua registry. * Registry entries are not directly accessible by the Lua scripts but * can be accessed by our callbacks. */ - ri.zri_space_used = 0; - ri.zri_pool = dmu_tx_pool(tx); - ri.zri_cred = evalargs->ea_cred; - ri.zri_tx = tx; - ri.zri_timed_out = B_FALSE; - ri.zri_sync = sync; - list_create(&ri.zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), - offsetof(zcp_cleanup_handler_t, zch_node)); - ri.zri_curinstrs = 0; - ri.zri_maxinstrs = evalargs->ea_instrlimit; - - lua_pushlightuserdata(state, &ri); + lua_pushlightuserdata(state, ri); lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); VERIFY3U(3, ==, lua_gettop(state)); @@ -857,7 +849,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * off control to the channel program. Channel programs that use too * much memory should die with ENOSPC. */ - evalargs->ea_allocargs->aa_must_succeed = B_FALSE; + ri->zri_allocargs->aa_must_succeed = B_FALSE; /* * Call the Lua function that open-context passed us. This pops the @@ -869,14 +861,14 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) /* * Let Lua use KM_SLEEP while we interpret the return values. */ - evalargs->ea_allocargs->aa_must_succeed = B_TRUE; + ri->zri_allocargs->aa_must_succeed = B_TRUE; /* * Remove the error handler callback from the stack. At this point, * there shouldn't be any cleanup handler registered in the handler * list (zri_cleanup_handlers), regardless of whether it ran or not. */ - list_destroy(&ri.zri_cleanup_handlers); + list_destroy(&ri->zri_cleanup_handlers); lua_remove(state, 1); switch (err) { @@ -896,16 +888,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) int return_count = lua_gettop(state); if (return_count == 1) { - evalargs->ea_result = 0; - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_RETURN, evalargs); + ri->zri_result = 0; + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_RETURN, &ri->zri_result); } else if (return_count > 1) { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); lua_settop(state, 0); (void) lua_pushfstring(state, "Multiple return " "values not supported"); - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); } break; } @@ -919,19 +911,20 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * stack. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); - if (evalargs->ea_result == ETIME && - evalargs->ea_outnvl != NULL) { - (void) nvlist_add_uint64(evalargs->ea_outnvl, - ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs); + if (ri->zri_result == ETIME && ri->zri_outnvl != NULL) { + (void) nvlist_add_uint64(ri->zri_outnvl, + ZCP_ARG_INSTRLIMIT, ri->zri_curinstrs); } break; } @@ -943,14 +936,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * return the error message. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); break; } case LUA_ERRMEM: @@ -958,7 +953,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * Lua ran out of memory while running the channel program. * There's not much we can do. */ - evalargs->ea_result = SET_ERROR(ENOSPC); + ri->zri_result = SET_ERROR(ENOSPC); break; default: VERIFY0(err); @@ -966,21 +961,35 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) } static void -zcp_pool_error(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_pool_error(zcp_run_info_t *ri, const char *poolname) { - evalargs->ea_result = SET_ERROR(ECHRNG); - lua_settop(evalargs->ea_state, 0); - (void) lua_pushfstring(evalargs->ea_state, "Could not open pool: %s", + ri->zri_result = SET_ERROR(ECHRNG); + lua_settop(ri->zri_state, 0); + (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s", poolname); - zcp_convert_return_values(evalargs->ea_state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(ri->zri_state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); + +} + +/* + * This callback is called when txg_wait_synced_sig encountered a signal. + * The txg_wait_synced_sig will continue to wait for the txg to complete + * after calling this callback. + */ +/* ARGSUSED */ +static void +zcp_eval_sig(void *arg, dmu_tx_t *tx) +{ + zcp_run_info_t *ri = arg; + ri->zri_canceled = B_TRUE; } static void zcp_eval_sync(void *arg, dmu_tx_t *tx) { - zcp_eval_arg_t *evalargs = arg; + zcp_run_info_t *ri = arg; /* * Open context should have setup the stack to contain: @@ -988,15 +997,14 @@ zcp_eval_sync(void *arg, dmu_tx_t *tx) * 2: Script to run (converted to a Lua function) * 3: nvlist input to function (converted to Lua table or nil) */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); - zcp_eval_impl(tx, B_TRUE, evalargs); + zcp_eval_impl(tx, ri); } static void -zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_eval_open(zcp_run_info_t *ri, const char *poolname) { - int error; dsl_pool_t *dp; dmu_tx_t *tx; @@ -1004,11 +1012,11 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) /* * See comment from the same assertion in zcp_eval_sync(). */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); error = dsl_pool_hold(poolname, FTAG, &dp); if (error != 0) { - zcp_pool_error(evalargs, poolname); + zcp_pool_error(ri, poolname); return; } @@ -1023,7 +1031,7 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) */ tx = dmu_tx_create_dd(dp->dp_mos_dir); - zcp_eval_impl(tx, B_FALSE, evalargs); + zcp_eval_impl(tx, ri); dmu_tx_abort(tx); @@ -1036,7 +1044,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, { int err; lua_State *state; - zcp_eval_arg_t evalargs; + zcp_run_info_t runinfo; if (instrlimit > zfs_lua_max_instrlimit) return (SET_ERROR(EINVAL)); @@ -1136,24 +1144,29 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, } VERIFY3U(3, ==, lua_gettop(state)); - evalargs.ea_state = state; - evalargs.ea_allocargs = &allocargs; - evalargs.ea_instrlimit = instrlimit; - evalargs.ea_cred = CRED(); - evalargs.ea_outnvl = outnvl; - evalargs.ea_result = 0; + runinfo.zri_state = state; + runinfo.zri_allocargs = &allocargs; + runinfo.zri_outnvl = outnvl; + runinfo.zri_result = 0; + runinfo.zri_cred = CRED(); + runinfo.zri_timed_out = B_FALSE; + runinfo.zri_canceled = B_FALSE; + runinfo.zri_sync = sync; + runinfo.zri_space_used = 0; + runinfo.zri_curinstrs = 0; + runinfo.zri_maxinstrs = instrlimit; if (sync) { - err = dsl_sync_task(poolname, NULL, - zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_ZCP_EVAL); + err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, + zcp_eval_sig, &runinfo, 0, ZFS_SPACE_CHECK_ZCP_EVAL); if (err != 0) - zcp_pool_error(&evalargs, poolname); + zcp_pool_error(&runinfo, poolname); } else { - zcp_eval_open(&evalargs, poolname); + zcp_eval_open(&runinfo, poolname); } lua_close(state); - return (evalargs.ea_result); + return (runinfo.zri_result); } /* diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c index ed98f0d1025b..42c125d48cd0 100644 --- a/module/zfs/zcp_get.c +++ b/module/zfs/zcp_get.c @@ -423,13 +423,11 @@ get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname, case ZFS_PROP_RECEIVE_RESUME_TOKEN: { char *token = get_receive_resume_stats_impl(ds); - VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, token, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) { char *childval = get_child_receive_stats(ds); - VERIFY3U(strlcpy(strval, childval, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, childval, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) error = ENOENT; @@ -549,7 +547,7 @@ get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop) error = dsl_prop_get_ds(ds, prop_name, sizeof (numval), 1, &numval, setpoint); - /* Fill in temorary value for prop, if applicable */ + /* Fill in temporary value for prop, if applicable */ (void) get_temporary_prop(ds, zfs_prop, &numval, setpoint); /* Push value to lua stack */ @@ -680,7 +678,7 @@ parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type, if (strncmp(cp, "S-1-", 4) == 0) { /* * It's a numeric SID (eg "S-1-234-567-89") and we want to - * seperate the domain id and the rid + * separate the domain id and the rid */ int domain_len = strrchr(cp, '-') - cp; domain_val = kmem_alloc(domain_len + 1, KM_SLEEP); diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c index f26445520711..d6e0b542175f 100644 --- a/module/zfs/zcp_iter.c +++ b/module/zfs/zcp_iter.c @@ -435,7 +435,7 @@ static zcp_list_info_t zcp_system_props_list_info = { }; /* - * Get a list of all visble properties and their values for a given dataset. + * Get a list of all visible properties and their values for a given dataset. * Returned on the stack as a Lua table. */ static int diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index b1af4da2f4a5..26af91e27d42 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -810,7 +810,7 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) * for zfs_copy_ace_2_fuid(). * * We only convert an ACL once, so this won't happen - * everytime. + * every time. */ oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count, KM_SLEEP); diff --git a/module/zfs/zfs_byteswap.c b/module/zfs/zfs_byteswap.c index 7893bde4e2db..1b8bb82c3fbc 100644 --- a/module/zfs/zfs_byteswap.c +++ b/module/zfs/zfs_byteswap.c @@ -44,7 +44,7 @@ zfs_oldace_byteswap(ace_t *ace, int ace_cnt) } /* - * swap ace_t and ace_oject_t + * swap ace_t and ace_object_t */ void zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) @@ -70,7 +70,7 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) * larger than needed to hold the aces * present. As long as we do not do any * swapping beyond the end of our block we are - * okay. It it safe to swap any non-ace data + * okay. It is safe to swap any non-ace data * within the block since it is just zeros. */ if (ptr + sizeof (zfs_ace_hdr_t) > end) { diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 46e6e19b91d5..3b2a6eb8273f 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -30,6 +30,7 @@ * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018 George Melikov. All Rights Reserved. + * Copyright (c) 2019 Datto, Inc. All rights reserved. */ /* @@ -85,6 +86,7 @@ #include #include #include +#include #include "zfs_namecheck.h" /* @@ -190,7 +192,7 @@ static void zfsctl_snapshot_add(zfs_snapentry_t *se) { ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); avl_add(&zfs_snapshots_by_name, se); avl_add(&zfs_snapshots_by_objsetid, se); } @@ -267,7 +269,7 @@ zfsctl_snapshot_find_by_name(char *snapname) search.se_name = snapname; se = avl_find(&zfs_snapshots_by_name, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } @@ -288,7 +290,7 @@ zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) search.se_objsetid = objsetid; se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } @@ -461,10 +463,14 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_id = id; - zp->z_unlinked = 0; - zp->z_atime_dirty = 0; - zp->z_zn_prefetch = 0; - zp->z_moved = 0; + zp->z_unlinked = B_FALSE; + zp->z_atime_dirty = B_FALSE; + zp->z_zn_prefetch = B_FALSE; + zp->z_moved = B_FALSE; + zp->z_is_sa = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_TRUE; + zp->z_is_stale = B_FALSE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; zp->z_seq = 0; @@ -473,10 +479,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_pflags = 0; zp->z_mode = 0; zp->z_sync_cnt = 0; - zp->z_is_mapped = B_FALSE; - zp->z_is_ctldir = B_TRUE; - zp->z_is_sa = B_FALSE; - zp->z_is_stale = B_FALSE; ip->i_generation = 0; ip->i_ino = id; ip->i_mode = (S_IFDIR | S_IRWXUGO); @@ -594,7 +596,7 @@ zfsctl_root(znode_t *zp) /* * Generate a long fid to indicate a snapdir. We encode whether snapdir is - * already monunted in gen field. We do this because nfsd lookup will not + * already mounted in gen field. We do this because nfsd lookup will not * trigger automount. Next time the nfsd does fh_to_dentry, we will notice * this and do automount and return ESTALE to force nfsd revalidate and follow * mount. @@ -702,37 +704,6 @@ zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, return (0); } -/* - * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" - */ -static int -zfsctl_snapshot_path(struct path *path, int len, char *full_path) -{ - char *path_buffer, *path_ptr; - int path_len, error = 0; - - path_buffer = kmem_alloc(len, KM_SLEEP); - - path_ptr = d_path(path, path_buffer, len); - if (IS_ERR(path_ptr)) { - error = -PTR_ERR(path_ptr); - goto out; - } - - path_len = path_buffer + len - 1 - path_ptr; - if (path_len > len) { - error = SET_ERROR(EFAULT); - goto out; - } - - memcpy(full_path, path_ptr, path_len); - full_path[path_len] = '\0'; -out: - kmem_free(path_buffer, len); - - return (error); -} - /* * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" */ @@ -1047,8 +1018,6 @@ zfsctl_snapshot_unmount(char *snapname, int flags) return (error); } -#define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ - int zfsctl_snapshot_mount(struct path *path, int flags) { @@ -1078,9 +1047,14 @@ zfsctl_snapshot_mount(struct path *path, int flags) if (error) goto error; - error = zfsctl_snapshot_path(path, MAXPATHLEN, full_path); - if (error) - goto error; + /* + * Construct a mount point path from sb of the ctldir inode and dirent + * name, instead of from d_path(), so that chroot'd process doesn't fail + * on mount.zfs(8). + */ + snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", + zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", + dname(dentry)); /* * Multiple concurrent automounts of a snapshot are never allowed. @@ -1109,8 +1083,8 @@ zfsctl_snapshot_mount(struct path *path, int flags) error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (error) { if (!(error & MOUNT_BUSY << 8)) { - cmn_err(CE_WARN, "Unable to automount %s/%s: %d", - full_path, full_name, error); + zfs_dbgmsg("Unable to automount %s error=%d", + full_path, error); error = SET_ERROR(EISDIR); } else { /* diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 63ac97754d37..6bdad737cd84 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -55,7 +55,7 @@ #include /* - * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups + * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups * of names after deciding which is the appropriate lookup interface. */ static int @@ -232,7 +232,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, /* * Wait until there are no locks on this name. * - * Don't grab the the lock if it is already held. However, cannot + * Don't grab the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index debe733dab7c..f2e808d6fb20 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1514,6 +1514,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *zplprops = NULL; dsl_crypto_params_t *dcp = NULL; char *spa_name = zc->zc_name; + boolean_t unload_wkey = B_TRUE; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) @@ -1541,11 +1542,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); if (nvl) { error = nvlist_dup(nvl, &rootprops, KM_SLEEP); - if (error != 0) { - nvlist_free(config); - nvlist_free(props); - return (error); - } + if (error != 0) + goto pool_props_bad; (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } @@ -1553,11 +1551,8 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) &hidden_args); error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, rootprops, hidden_args, &dcp); - if (error != 0) { - nvlist_free(config); - nvlist_free(props); - return (error); - } + if (error != 0) + goto pool_props_bad; (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -1577,15 +1572,17 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) * Set the remaining root properties */ if (!error && (error = zfs_set_prop_nvlist(spa_name, - ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) + ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) { (void) spa_destroy(spa_name); + unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */ + } pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); - dsl_crypto_params_free(dcp, !!error); + dsl_crypto_params_free(dcp, unload_wkey && !!error); return (error); } @@ -2114,7 +2111,7 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) * which we aren't supposed to do with a * DS_MODE_USER hold, because it could be * inconsistent. So this is a bit of a workaround... - * XXX reading with out owning + * XXX reading without owning */ if (!zc->zc_objset_stats.dds_inconsistent && dmu_objset_type(os) == DMU_OST_ZVOL) { @@ -2747,10 +2744,9 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(const char *fsname, nvlist_t *nvl) +zfs_check_userprops(nvlist_t *nvl) { nvpair_t *pair = NULL; - int error = 0; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); @@ -2759,10 +2755,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl) nvpair_type(pair) != DATA_TYPE_STRING) return (SET_ERROR(EINVAL)); - if ((error = zfs_secpolicy_write_perms(fsname, - ZFS_DELEG_PERM_USERPROP, CRED()))) - return (error); - if (strlen(propname) >= ZAP_MAXNAMELEN) return (SET_ERROR(ENAMETOOLONG)); @@ -3476,19 +3468,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *pair; (void) nvlist_lookup_nvlist(innvl, "props", &props); - if ((error = zfs_check_userprops(poolname, props)) != 0) - return (error); - if (!nvlist_empty(props) && zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) return (SET_ERROR(ENOTSUP)); + if ((error = zfs_check_userprops(props)) != 0) + return (error); snaps = fnvlist_lookup_nvlist(innvl, "snaps"); poollen = strlen(poolname); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); - const char *cp = strchr(name, '@'); + char *cp = strchr(name, '@'); /* * The snap name must contain an @, and the part after it must @@ -3505,6 +3496,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) (name[poollen] != '/' && name[poollen] != '@')) return (SET_ERROR(EXDEV)); + /* + * Check for permission to set the properties on the fs. + */ + if (!nvlist_empty(props)) { + *cp = '\0'; + error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_USERPROP, CRED()); + *cp = '@'; + if (error != 0) + return (error); + } + /* This must be the only snap of this fs. */ for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { @@ -6874,7 +6877,7 @@ zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec) continue; if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) { - /* at least one non-optionial key is expected here */ + /* at least one non-optional key is expected here */ if (!required_keys_found) return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED)); continue; @@ -7113,7 +7116,8 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); - error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); + error = ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), + flag); if (error != 0) { error = SET_ERROR(EFAULT); goto out; @@ -7272,7 +7276,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) out: nvlist_free(innvl); - rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); + rc = ddi_copyout(zc, (void *)(uintptr_t)arg, sizeof (zfs_cmd_t), flag); if (error == 0 && rc != 0) error = SET_ERROR(EFAULT); if (error == 0 && vec->zvec_allow_log) { @@ -7383,13 +7387,6 @@ _init(void) { int error; - error = -vn_set_pwd("/"); - if (error) { - printk(KERN_NOTICE - "ZFS: Warning unable to set pwd to '/': %d\n", error); - return (error); - } - if ((error = -zvol_init()) != 0) return (error); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 15c396ce0329..41b663b65fb8 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 by Delphix. All rights reserved. + * Copyright (c) 2015, 2018 by Delphix. All rights reserved. */ @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -380,12 +381,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +void zil_remove_async(zilog_t *zilog, uint64_t oid); + /* * Handles both TX_REMOVE and TX_RMDIR transactions. */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid) + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked) { itx_t *itx; lr_remove_t *lr; @@ -401,6 +404,17 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, itx->itx_oid = foid; + /* + * Object ids can be re-instantiated in the next txg so + * remove any async transactions to avoid future leaks. + * This can happen if a fsync occurs on the re-instantiated + * object for a WR_INDIRECT or WR_NEED_COPY write, which gets + * the new file data and flushes a write record for the old object. + */ + if (unlinked) { + ASSERT((txtype & ~TX_CI) == TX_REMOVE); + zil_remove_async(zilog, foid); + } zil_itx_assign(zilog, itx, tx); } @@ -497,6 +511,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag, zil_callback_t callback, void *callback_data) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); uint32_t blocksize = zp->z_blksz; itx_wr_state_t write_state; uintptr_t fsync_cnt; @@ -528,7 +543,14 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_wr_state_t wr_state = write_state; ssize_t len = resid; - if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA) + /* + * A WR_COPIED record must fit entirely in one log block. + * Large writes can use WR_NEED_COPY, which the ZIL will + * split into multiple records across several log blocks + * if necessary. + */ + if (wr_state == WR_COPIED && + resid > zil_max_copied_data(zilog)) wr_state = WR_NEED_COPY; else if (wr_state == WR_INDIRECT) len = MIN(blocksize - P2PHASE(off, blocksize), resid); @@ -536,13 +558,16 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, sizeof (*lr) + (wr_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; - if (wr_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, - zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + + DB_DNODE_ENTER(db); + if (wr_state == WR_COPIED && dmu_read_by_dnode(DB_DNODE(db), + off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; } + DB_DNODE_EXIT(db); itx->itx_wr_state = wr_state; lr->lr_foid = zp->z_id; diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 144381769059..7dea85bb6614 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -337,8 +337,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto bail; if (lr->lr_common.lrc_txtype & TX_CI) @@ -473,8 +473,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, objid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto out; if (lr->lr_common.lrc_txtype & TX_CI) diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index d514a4fc7753..94203a40c582 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -104,7 +104,7 @@ * Locks are ordered on the start offset of the range. */ static int -rangelock_compare(const void *arg1, const void *arg2) +zfs_rangelock_compare(const void *arg1, const void *arg2) { const locked_range_t *rl1 = (const locked_range_t *)arg1; const locked_range_t *rl2 = (const locked_range_t *)arg2; @@ -118,17 +118,17 @@ rangelock_compare(const void *arg1, const void *arg2) * and may increase the range that's locked for RL_WRITER. */ void -rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg) +zfs_rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg) { mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL); - avl_create(&rl->rl_tree, rangelock_compare, + avl_create(&rl->rl_tree, zfs_rangelock_compare, sizeof (locked_range_t), offsetof(locked_range_t, lr_node)); rl->rl_cb = cb; rl->rl_arg = arg; } void -rangelock_fini(rangelock_t *rl) +zfs_rangelock_fini(rangelock_t *rl) { mutex_destroy(&rl->rl_lock); avl_destroy(&rl->rl_tree); @@ -138,7 +138,7 @@ rangelock_fini(rangelock_t *rl) * Check if a write lock can be grabbed, or wait and recheck until available. */ static void -rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *lr; @@ -209,7 +209,7 @@ rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) * a proxy and return the proxy. */ static locked_range_t * -rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) +zfs_rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) { locked_range_t *proxy; @@ -241,7 +241,7 @@ rangelock_proxify(avl_tree_t *tree, locked_range_t *lr) * returning the *front* proxy. */ static locked_range_t * -rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) +zfs_rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) { ASSERT3U(lr->lr_length, >, 1); ASSERT3U(off, >, lr->lr_offset); @@ -259,7 +259,7 @@ rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) rear->lr_write_wanted = B_FALSE; rear->lr_read_wanted = B_FALSE; - locked_range_t *front = rangelock_proxify(tree, lr); + locked_range_t *front = zfs_rangelock_proxify(tree, lr); front->lr_length = off - lr->lr_offset; avl_insert_here(tree, rear, front, AVL_AFTER); @@ -270,7 +270,7 @@ rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off) * Create and add a new proxy range lock for the supplied range. */ static void -rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) +zfs_rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) { ASSERT(len != 0); locked_range_t *lr = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); @@ -285,7 +285,7 @@ rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) } static void -rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, +zfs_rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, locked_range_t *prev, avl_index_t where) { locked_range_t *next; @@ -307,7 +307,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, * convert to proxy if needed then * split this entry and bump ref count */ - prev = rangelock_split(tree, prev, off); + prev = zfs_rangelock_split(tree, prev, off); prev = AVL_NEXT(tree, prev); /* move to rear range */ } } @@ -326,7 +326,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, if (off < next->lr_offset) { /* Add a proxy for initial range before the overlap */ - rangelock_new_proxy(tree, off, next->lr_offset - off); + zfs_rangelock_new_proxy(tree, off, next->lr_offset - off); } new->lr_count = 0; /* will use proxies in tree */ @@ -344,30 +344,30 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, /* there's a gap */ ASSERT3U(next->lr_offset, >, prev->lr_offset + prev->lr_length); - rangelock_new_proxy(tree, + zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, next->lr_offset - (prev->lr_offset + prev->lr_length)); } if (off + len == next->lr_offset + next->lr_length) { /* exact overlap with end */ - next = rangelock_proxify(tree, next); + next = zfs_rangelock_proxify(tree, next); next->lr_count++; return; } if (off + len < next->lr_offset + next->lr_length) { /* new range ends in the middle of this block */ - next = rangelock_split(tree, next, off + len); + next = zfs_rangelock_split(tree, next, off + len); next->lr_count++; return; } ASSERT3U(off + len, >, next->lr_offset + next->lr_length); - next = rangelock_proxify(tree, next); + next = zfs_rangelock_proxify(tree, next); next->lr_count++; } /* Add the remaining end range. */ - rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, + zfs_rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length, (off + len) - (prev->lr_offset + prev->lr_length)); } @@ -375,7 +375,7 @@ rangelock_add_reader(avl_tree_t *tree, locked_range_t *new, * Check if a reader lock can be grabbed, or wait and recheck until available. */ static void -rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) +zfs_rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *prev, *next; @@ -437,7 +437,7 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * Add the read lock, which may involve splitting existing * locks and bumping ref counts (r_count). */ - rangelock_add_reader(tree, new, prev, where); + zfs_rangelock_add_reader(tree, new, prev, where); } /* @@ -448,7 +448,7 @@ rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) * entire file is locked as RL_WRITER). */ locked_range_t * -rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, +zfs_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, rangelock_type_t type) { ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); @@ -473,9 +473,11 @@ rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, if (avl_numnodes(&rl->rl_tree) == 0) avl_add(&rl->rl_tree, new); else - rangelock_enter_reader(rl, new); - } else - rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */ + zfs_rangelock_enter_reader(rl, new); + } else { + /* RL_WRITER or RL_APPEND */ + zfs_rangelock_enter_writer(rl, new); + } mutex_exit(&rl->rl_lock); return (new); } @@ -484,7 +486,7 @@ rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, * Safely free the locked_range_t. */ static void -rangelock_free(locked_range_t *lr) +zfs_rangelock_free(locked_range_t *lr) { if (lr->lr_write_wanted) cv_destroy(&lr->lr_write_cv); @@ -499,7 +501,7 @@ rangelock_free(locked_range_t *lr) * Unlock a reader lock */ static void -rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, +zfs_rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, list_t *free_list) { avl_tree_t *tree = &rl->rl_tree; @@ -561,7 +563,7 @@ rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove, * Unlock range and destroy range lock structure. */ void -rangelock_exit(locked_range_t *lr) +zfs_rangelock_exit(locked_range_t *lr) { rangelock_t *rl = lr->lr_rangelock; list_t free_list; @@ -592,12 +594,12 @@ rangelock_exit(locked_range_t *lr) * lock may be shared, let rangelock_exit_reader() * release the lock and free the locked_range_t. */ - rangelock_exit_reader(rl, lr, &free_list); + zfs_rangelock_exit_reader(rl, lr, &free_list); } mutex_exit(&rl->rl_lock); while ((free_lr = list_remove_head(&free_list)) != NULL) - rangelock_free(free_lr); + zfs_rangelock_free(free_lr); list_destroy(&free_list); } @@ -608,7 +610,7 @@ rangelock_exit(locked_range_t *lr) * entry in the tree. */ void -rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) +zfs_rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) { rangelock_t *rl = lr->lr_rangelock; @@ -631,9 +633,9 @@ rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len) } #if defined(_KERNEL) -EXPORT_SYMBOL(rangelock_init); -EXPORT_SYMBOL(rangelock_fini); -EXPORT_SYMBOL(rangelock_enter); -EXPORT_SYMBOL(rangelock_exit); -EXPORT_SYMBOL(rangelock_reduce); +EXPORT_SYMBOL(zfs_rangelock_init); +EXPORT_SYMBOL(zfs_rangelock_fini); +EXPORT_SYMBOL(zfs_rangelock_enter); +EXPORT_SYMBOL(zfs_rangelock_exit); +EXPORT_SYMBOL(zfs_rangelock_reduce); #endif diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c index 30b5edb01e18..bb7f3b69a662 100644 --- a/module/zfs/zfs_sysfs.c +++ b/module/zfs/zfs_sysfs.c @@ -144,6 +144,10 @@ zfs_kobj_release(struct kobject *kobj) zkobj->zko_attr_count = 0; } +#ifndef sysfs_attr_init +#define sysfs_attr_init(attr) do {} while (0) +#endif + static void zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) { @@ -154,6 +158,7 @@ zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) zkobj->zko_attr_list[attr_num].name = attr_name; zkobj->zko_attr_list[attr_num].mode = 0444; zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num]; + sysfs_attr_init(&zkobj->zko_attr_list[attr_num]); } static int @@ -259,6 +264,7 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, char *buf, size_t buflen) { const char *show_str; + char number[32]; /* For dataset properties list the dataset types that apply */ if (strcmp(attr_name, "datasets") == 0 && @@ -286,8 +292,6 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, } else if (strcmp(attr_name, "values") == 0) { show_str = property->pd_values ? property->pd_values : ""; } else if (strcmp(attr_name, "default") == 0) { - char number[32]; - switch (property->pd_proptype) { case PROP_TYPE_NUMBER: (void) snprintf(number, sizeof (number), "%llu", diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 371c412f6beb..0e14cadac5e9 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1476,7 +1476,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) * "preferred" size. */ - /* Round up so we never have a filesytem using 0 blocks. */ + /* Round up so we never have a filesystem using 0 blocks. */ refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize); statp->f_blocks = (refdbytes + availbytes) >> bshift; statp->f_bfree = availbytes >> bshift; @@ -1736,7 +1736,12 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * - * Release all holds on dbufs. + * Release all holds on dbufs. We also grab an extra reference to all + * the remaining inodes so that the kernel does not attempt to free + * any inodes of a suspended fs. This can cause deadlocks since the + * zfs_resume_fs() process may involve starting threads, which might + * attempt to free unreferenced inodes to free up memory for the new + * thread. */ if (!unmounting) { mutex_enter(&zfsvfs->z_znodes_lock); @@ -1744,6 +1749,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zp = list_next(&zfsvfs->z_all_znodes, zp)) { if (zp->z_sa_hdl) zfs_znode_dmu_fini(zp); + if (igrab(ZTOI(zp)) != NULL) + zp->z_suspended = B_TRUE; + } mutex_exit(&zfsvfs->z_znodes_lock); } @@ -1777,8 +1785,17 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * Evict cached data. We must write out any dirty data before * disowning the dataset. */ - if (!zfs_is_readonly(zfsvfs)) + objset_t *os = zfsvfs->z_os; + boolean_t os_dirty = B_FALSE; + for (int t = 0; t < TXG_SIZE; t++) { + if (dmu_objset_is_dirty(os, t)) { + os_dirty = B_TRUE; + break; + } + } + if (!zfs_is_readonly(zfsvfs) && os_dirty) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + } dmu_objset_evict_dbufs(zfsvfs->z_os); return (0); @@ -2192,6 +2209,12 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } + + /* see comment in zfs_suspend_fs() */ + if (zp->z_suspended) { + zfs_iput_async(ZTOI(zp)); + zp->z_suspended = B_FALSE; + } } mutex_exit(&zfsvfs->z_znodes_lock); @@ -2373,7 +2396,7 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) } /* - * Return true if the coresponding vfs's unmounted flag is set. + * Return true if the corresponding vfs's unmounted flag is set. * Otherwise return false. * If this function returns true we know VFS unmount has been initiated. */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 41c1bd255753..03a8c4a50b04 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -485,7 +485,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Lock the range against changes. */ - locked_range_t *lr = rangelock_enter(&zp->z_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, uio->uio_loffset, uio->uio_resid, RL_READER); /* @@ -558,7 +558,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); task_io_account_read(nread); out: - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (error); @@ -672,7 +672,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ - lr = rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); woff = lr->lr_offset; if (lr->lr_length == UINT64_MAX) { /* @@ -689,11 +689,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ - lr = rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); } if (woff >= limit) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (SET_ERROR(EFBIG)); } @@ -775,7 +775,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) */ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); + DB_DNODE_ENTER(db); + dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, + MIN(n, max_blksz)); + DB_DNODE_EXIT(db); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { @@ -807,7 +811,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); - rangelock_reduce(lr, woff, n); + zfs_rangelock_reduce(lr, woff, n); } /* @@ -885,7 +889,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Clear Set-UID/Set-GID bits on successful write if not * privileged and at least one of the execute bits is set. * - * It would be nice to to this after all writes have + * It would be nice to do this after all writes have * been done, but that would still expose the ISUID/ISGID * to another app after the partial write is committed. * @@ -946,7 +950,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) } zfs_inode_update(zp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); /* * If we're in replay mode, or we made no progress, return error. @@ -999,7 +1003,7 @@ zfs_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); /* * Release the vnode asynchronously as we currently have the @@ -1048,7 +1052,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) return (SET_ERROR(ENOENT)); } - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; zgd->zgd_private = zp; @@ -1060,7 +1064,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = rangelock_enter(&zp->z_rangelock, + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { @@ -1082,12 +1086,12 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; - zgd->zgd_lr = rangelock_enter(&zp->z_rangelock, + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, size, RL_READER); if (zp->z_blksz == size) break; offset += blkoff; - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) @@ -1651,7 +1655,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, zfs_fuid_sync(zfsvfs, tx); /* Add to unlinked set */ - zp->z_unlinked = 1; + zp->z_unlinked = B_TRUE; zfs_unlinked_add(zp, tx); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -1676,6 +1680,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, * IN: dip - inode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -1849,7 +1854,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) if (xattr_obj_unlinked) { ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2); mutex_enter(&xzp->z_lock); - xzp->z_unlinked = 1; + xzp->z_unlinked = B_TRUE; clear_nlink(ZTOI(xzp)); links = 0; error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), @@ -1881,7 +1886,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, obj); + zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); dmu_tx_commit(tx); out: @@ -1917,6 +1922,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. + * flags - case flags. * vsecp - ACL to be set * * OUT: ipp - inode of created directory. @@ -2213,7 +2219,8 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); + zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT, + B_FALSE); } dmu_tx_commit(tx); @@ -2235,13 +2242,12 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, } /* - * Read as many directory entries as will fit into the provided - * dirent buffer from the given directory cursor position. + * Read directory entries from the given directory cursor position and emit + * name and position for each entry. * * IN: ip - inode of directory to read. - * dirent - buffer for directory entries. - * - * OUT: dirent - filler buffer of directory entries. + * ctx - directory entry context. + * cr - credentials of caller. * * RETURN: 0 if success * error code if failure @@ -3401,7 +3407,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) } if ((mask & ATTR_ATIME) || zp->z_atime_dirty) { - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; ZFS_TIME_ENCODE(&ip->i_atime, atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, sizeof (atime)); @@ -4006,13 +4012,14 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, * Insert the indicated symbolic reference entry into the directory. * * IN: dip - Directory to contain new symbolic link. - * link - Name for new symlink entry. + * name - Name of directory entry in dip. * vap - Attributes of new entry. - * target - Target path of new symlink. - * + * link - Name for new symlink entry. * cr - credentials of caller. * flags - case flags * + * OUT: ipp - Inode for new symbolic link. + * * RETURN: 0 on success, error code on failure. * * Timestamps: @@ -4216,6 +4223,7 @@ zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) * sip - inode of new entry. * name - name of new entry. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -4363,14 +4371,14 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, } /* unmark z_unlinked so zfs_link_create will not reject */ if (is_tmpfile) - szp->z_unlinked = 0; + szp->z_unlinked = B_FALSE; error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { uint64_t txtype = TX_LINK; /* * tmpfile is created to be in z_unlinkedobj, so remove it. - * Also, we don't log in ZIL, be cause all previous file + * Also, we don't log in ZIL, because all previous file * operation on the tmpfile are ignored by ZIL. Instead we * always wait for txg to sync to make sure all previous * operation are sync safe. @@ -4385,7 +4393,7 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, } } else if (is_tmpfile) { /* restore z_unlinked since when linking failed */ - szp->z_unlinked = 1; + szp->z_unlinked = B_TRUE; } txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); @@ -4509,14 +4517,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) redirty_page_for_writepage(wbc, pp); unlock_page(pp); - locked_range_t *lr = rangelock_enter(&zp->z_rangelock, + locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, pgoff, pglen, RL_WRITER); lock_page(pp); /* Page mapping changed or it was no longer dirty, we're done */ if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (0); } @@ -4524,10 +4532,12 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Another process started write block if required */ if (PageWriteback(pp)) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(pp); + if (wbc->sync_mode != WB_SYNC_NONE) { + if (PageWriteback(pp)) + wait_on_page_bit(pp, PG_writeback); + } ZFS_EXIT(zfsvfs); return (0); @@ -4536,7 +4546,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Clear the dirty flag the required locks are held */ if (!clear_page_dirty_for_io(pp)) { unlock_page(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (0); } @@ -4563,7 +4573,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) __set_page_dirty_nobuffers(pp); ClearPageError(pp); end_page_writeback(pp); - rangelock_exit(lr); + zfs_rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (err); } @@ -4581,7 +4591,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) /* Preserve the mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_ctime, ctime); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; zp->z_seq++; err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); @@ -4590,7 +4600,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) zfs_putpage_commit_cb, pp); dmu_tx_commit(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); if (wbc->sync_mode != WB_SYNC_NONE) { /* @@ -4628,14 +4638,14 @@ zfs_dirty_inode(struct inode *ip, int flags) #ifdef I_DIRTY_TIME /* - * This is the lazytime semantic indroduced in Linux 4.0 + * This is the lazytime semantic introduced in Linux 4.0 * This flag will only be called from update_time when lazytime is set. * (Note, I_DIRTY_SYNC will also set if not lazytime) * Fortunately mtime and ctime are managed within ZFS itself, so we * only need to dirty atime. */ if (flags == I_DIRTY_TIME) { - zp->z_atime_dirty = 1; + zp->z_atime_dirty = B_TRUE; goto out; } #endif @@ -4652,7 +4662,7 @@ zfs_dirty_inode(struct inode *ip, int flags) } mutex_enter(&zp->z_lock); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); @@ -4697,7 +4707,7 @@ zfs_inactive(struct inode *ip) return; } - if (zp->z_atime_dirty && zp->z_unlinked == 0) { + if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); @@ -4710,7 +4720,7 @@ zfs_inactive(struct inode *ip) mutex_enter(&zp->z_lock); (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), (void *)&atime, sizeof (atime), tx); - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; mutex_exit(&zp->z_lock); dmu_tx_commit(tx); } @@ -4727,7 +4737,6 @@ zfs_inactive(struct inode *ip) * IN: ip - inode seeking within * ooff - old file offset * noffp - pointer to new file offset - * ct - caller context * * RETURN: 0 if success * EINVAL if new offset invalid @@ -5070,13 +5079,14 @@ zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) #ifdef HAVE_UIO_ZEROCOPY /* - * Tunable, both must be a power of 2. - * - * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf - * zcr_blksz_max: if set to less than the file block size, allow loaning out of - * an arcbuf for a partial block read + * The smallest read we may consider to loan out an arcbuf. + * This must be a power of 2. */ int zcr_blksz_min = (1 << 10); /* 1K */ +/* + * If set to less than the file block size, allow loaning out of an + * arcbuf for a partial block read. This must be a power of 2. + */ int zcr_blksz_max = (1 << 17); /* 128K */ /*ARGSUSED*/ @@ -5255,9 +5265,11 @@ EXPORT_SYMBOL(zfs_putpage); EXPORT_SYMBOL(zfs_dirty_inode); EXPORT_SYMBOL(zfs_map); -/* CSTYLED */ +/* BEGIN CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); -module_param(zfs_read_chunk_size, long, 0644); +module_param(zfs_read_chunk_size, ulong, 0644); MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk"); +/* END CSTYLED */ + #endif diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 77eb8bb9126f..59b7cd3c81d3 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -129,13 +129,13 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); - rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); + zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; zp->z_xattr_cached = NULL; zp->z_xattr_parent = 0; - zp->z_moved = 0; + zp->z_moved = B_FALSE; return (0); } @@ -151,7 +151,7 @@ zfs_znode_cache_destructor(void *buf, void *arg) rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); rw_destroy(&zp->z_xattr_lock); - rangelock_fini(&zp->z_rangelock); + zfs_rangelock_fini(&zp->z_rangelock); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_acl_cached == NULL); @@ -515,7 +515,7 @@ zfs_inode_update(znode_t *zp) */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, - dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) + dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; struct inode *ip; @@ -539,18 +539,18 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT(zp->z_dirlocks == NULL); ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); - zp->z_moved = 0; + zp->z_unlinked = B_FALSE; + zp->z_atime_dirty = B_FALSE; + zp->z_moved = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_FALSE; + zp->z_is_stale = B_FALSE; zp->z_sa_hdl = NULL; - zp->z_unlinked = 0; - zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_is_mapped = B_FALSE; - zp->z_is_ctldir = B_FALSE; - zp->z_is_stale = B_FALSE; zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); @@ -596,7 +596,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_ctime, ctime); - ip->i_ino = obj; + ip->i_ino = zp->z_id; zfs_inode_update(zp); zfs_inode_set_ops(zfsvfs, ip); @@ -651,12 +651,11 @@ static zfs_acl_phys_t acl_phys; * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root + * IS_TMPFILE - new object is of O_TMPFILE * IS_XATTR - new object is an attribute - * bonuslen - length of bonus buffer - * setaclp - File/Dir initial ACL - * fuidp - Tracks fuid allocation. + * acl_ids - ACL related attributes * - * OUT: zpp - allocated znode + * OUT: zpp - allocated znode (set to dzp if IS_ROOT_NODE) * */ void @@ -788,7 +787,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } /* - * No execs denied will be deterimed when zfs_mode_compute() is called. + * No execs denied will be determined when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| @@ -911,8 +910,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * not fail retry until sufficient memory has been reclaimed. */ do { - *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, obj, - sa_hdl); + *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); } while (*zpp == NULL); VERIFY(*zpp != NULL); @@ -1095,6 +1093,10 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); /* + * If zp->z_unlinked is set, the znode is already marked + * for deletion and should not be discovered. Check this + * after checking igrab() due to fsetxattr() & O_TMPFILE. + * * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. @@ -1109,18 +1111,24 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { - mutex_exit(&zp->z_lock); - sa_buf_rele(db, NULL); - zfs_znode_hold_exit(zfsvfs, zh); - /* inode might need this to finish evict */ - cond_resched(); - goto again; + if (zp->z_unlinked) + err = SET_ERROR(ENOENT); + else + err = SET_ERROR(EAGAIN); + } else { + *zpp = zp; + err = 0; } - *zpp = zp; - err = 0; + mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); zfs_znode_hold_exit(zfsvfs, zh); + + if (err == EAGAIN) { + /* inode might need this to finish evict */ + cond_resched(); + goto again; + } return (err); } @@ -1135,7 +1143,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * bonus buffer. */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, - doi.doi_bonus_type, obj_num, NULL); + doi.doi_bonus_type, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { @@ -1255,7 +1263,7 @@ zfs_rezget(znode_t *zp) ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime); - if (gen != ZTOI(zp)->i_generation) { + if ((uint32_t)gen != ZTOI(zp)->i_generation) { zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zfsvfs, zh); return (SET_ERROR(EIO)); @@ -1265,14 +1273,14 @@ zfs_rezget(znode_t *zp) zfs_set_inode_flags(zp, ZTOI(zp)); zp->z_blksz = doi.doi_data_block_size; - zp->z_atime_dirty = 0; + zp->z_atime_dirty = B_FALSE; zfs_inode_update(zp); /* * If the file has zero links, then it has been unlinked on the send * side and it must be in the received unlinked set. * We call zfs_znode_dmu_fini() now to prevent any accesses to the - * stale data and to prevent automatical removal of the file in + * stale data and to prevent automatic removal of the file in * zfs_zinactive(). The file will be removed either when it is removed * on the send side and the next incremental stream is received or * when the unlinked set gets processed. @@ -1476,13 +1484,13 @@ zfs_extend(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1512,7 +1520,7 @@ zfs_extend(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1524,7 +1532,7 @@ zfs_extend(znode_t *zp, uint64_t end) VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); - rangelock_exit(lr); + zfs_rangelock_exit(lr); dmu_tx_commit(tx); @@ -1593,13 +1601,13 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) /* * Lock the range being freed. */ - lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } @@ -1649,7 +1657,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) page_len); } } - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1675,20 +1683,20 @@ zfs_trunc(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, DMU_OBJECT_END); if (error) { - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1698,7 +1706,7 @@ zfs_trunc(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (error); } @@ -1714,7 +1722,7 @@ zfs_trunc(znode_t *zp, uint64_t end) VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); - rangelock_exit(lr); + zfs_rangelock_exit(lr); return (0); } @@ -1885,9 +1893,9 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); - rootzp->z_moved = 0; - rootzp->z_unlinked = 0; - rootzp->z_atime_dirty = 0; + rootzp->z_unlinked = B_FALSE; + rootzp->z_atime_dirty = B_FALSE; + rootzp->z_moved = B_FALSE; rootzp->z_is_sa = USE_SA(version, os); rootzp->z_pflags = 0; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index ff14a98b6b25..c4d7d6ed1dfc 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -58,7 +58,7 @@ * * In the event of a crash or power loss, the itxs contained by each * dataset's on-disk ZIL will be replayed when that dataset is first - * instantiated (e.g. if the dataset is a normal fileystem, when it is + * instantiated (e.g. if the dataset is a normal filesystem, when it is * first mounted). * * As hinted at above, there is one ZIL per dataset (both the in-memory @@ -1416,13 +1416,26 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) * aligned to 4KB) actually gets written. However, we can't always just * allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted. */ -uint64_t zil_block_buckets[] = { - 4096, /* non TX_WRITE */ - 8192+4096, /* data base */ - 32*1024 + 4096, /* NFS writes */ - UINT64_MAX +struct { + uint64_t limit; + uint64_t blksz; +} zil_block_buckets[] = { + { 4096, 4096 }, /* non TX_WRITE */ + { 8192 + 4096, 8192 + 4096 }, /* database */ + { 32768 + 4096, 32768 + 4096 }, /* NFS writes */ + { 65536 + 4096, 65536 + 4096 }, /* 64KB writes */ + { 131072, 131072 }, /* < 128KB writes */ + { 131072 +4096, 65536 + 4096 }, /* 128KB writes */ + { UINT64_MAX, SPA_OLD_MAXBLOCKSIZE}, /* > 128KB writes */ }; +/* + * Maximum block size used by the ZIL. This is picked up when the ZIL is + * initialized. Otherwise this should not be used directly; see + * zl_max_block_size instead. + */ +int zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE; + /* * Start a log block write and advance to the next log block. * Calls are serialized. @@ -1497,11 +1510,9 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) * pool log space. */ zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t); - for (i = 0; zil_blksz > zil_block_buckets[i]; i++) + for (i = 0; zil_blksz > zil_block_buckets[i].limit; i++) continue; - zil_blksz = zil_block_buckets[i]; - if (zil_blksz == UINT64_MAX) - zil_blksz = SPA_OLD_MAXBLOCKSIZE; + zil_blksz = MIN(zil_block_buckets[i].blksz, zilog->zl_max_block_size); zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; for (i = 0; i < ZIL_PREV_BLKS; i++) zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); @@ -1562,13 +1573,47 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) return (nlwb); } +/* + * Maximum amount of write data that can be put into single log block. + */ +uint64_t +zil_max_log_data(zilog_t *zilog) +{ + return (zilog->zl_max_block_size - + sizeof (zil_chain_t) - sizeof (lr_write_t)); +} + +/* + * Maximum amount of log space we agree to waste to reduce number of + * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). + */ +static inline uint64_t +zil_max_waste_space(zilog_t *zilog) +{ + return (zil_max_log_data(zilog) / 8); +} + +/* + * Maximum amount of write data for WR_COPIED. For correctness, consumers + * must fall back to WR_NEED_COPY if we can't fit the entire record into one + * maximum sized log block, because each WR_COPIED record must fit in a + * single log block. For space efficiency, we want to fit two records into a + * max-sized log block. + */ +uint64_t +zil_max_copied_data(zilog_t *zilog) +{ + return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 - + sizeof (lr_write_t)); +} + static lwb_t * zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) { lr_t *lrcb, *lrc; lr_write_t *lrwb, *lrw; char *lr_buf; - uint64_t dlen, dnow, lwb_sp, reclen, txg; + uint64_t dlen, dnow, lwb_sp, reclen, txg, max_log_data; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3P(lwb, !=, NULL); @@ -1617,15 +1662,27 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) * For WR_NEED_COPY optimize layout for minimal number of chunks. */ lwb_sp = lwb->lwb_sz - lwb->lwb_nused; + max_log_data = zil_max_log_data(zilog); if (reclen > lwb_sp || (reclen + dlen > lwb_sp && - lwb_sp < ZIL_MAX_WASTE_SPACE && (dlen % ZIL_MAX_LOG_DATA == 0 || - lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) { + lwb_sp < zil_max_waste_space(zilog) && + (dlen % max_log_data == 0 || + lwb_sp < reclen + dlen % max_log_data))) { lwb = zil_lwb_write_issue(zilog, lwb); if (lwb == NULL) return (NULL); zil_lwb_write_open(zilog, lwb); ASSERT(LWB_EMPTY(lwb)); lwb_sp = lwb->lwb_sz - lwb->lwb_nused; + + /* + * There must be enough space in the new, empty log block to + * hold reclen. For WR_COPIED, we need to fit the whole + * record in one block, and reclen is the header size + the + * data size. For WR_NEED_COPY, we can create multiple + * records, splitting the data into multiple blocks, so we + * only need to fit one word of data per block; in this case + * reclen is just the header size (no data). + */ ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp); } @@ -1824,7 +1881,7 @@ zil_aitx_compare(const void *x1, const void *x2) /* * Remove all async itx with the given oid. */ -static void +void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; @@ -1876,16 +1933,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) itxg_t *itxg; itxs_t *itxs, *clean = NULL; - /* - * Object ids can be re-instantiated in the next txg so - * remove any async transactions to avoid future leaks. - * This can happen if a fsync occurs on the re-instantiated - * object for a WR_INDIRECT or WR_NEED_COPY write, which gets - * the new file data and flushes a write record for the old object. - */ - if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) - zil_remove_async(zilog, itx->itx_oid); - /* * Ensure the data of a renamed file is committed before the rename. */ @@ -1961,7 +2008,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) /* * If there are any in-memory intent log transactions which have now been * synced then start up a taskq to free them. We should only do this after we - * have written out the uberblocks (i.e. txg has been comitted) so that + * have written out the uberblocks (i.e. txg has been committed) so that * don't inadvertently clean out in-memory log records that would be required * by zil_commit(). */ @@ -3124,6 +3171,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) zilog->zl_dirty_max_txg = 0; zilog->zl_last_lwb_opened = NULL; zilog->zl_last_lwb_latency = 0; + zilog->zl_max_block_size = zil_maxblocksize; mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3637,5 +3685,8 @@ MODULE_PARM_DESC(zil_nocacheflush, "Disable ZIL cache flushes"); module_param(zil_slog_bulk, ulong, 0644); MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit"); + +module_param(zil_maxblocksize, int, 0644); +MODULE_PARM_DESC(zil_maxblocksize, "Limit in bytes of ZIL log block size"); /* END CSTYLED */ #endif diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 016ac07eabd9..1bd9f2e90b0a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -96,9 +96,23 @@ int zio_slow_io_ms = (30 * MILLISEC); * * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that * regular blocks are not deferred. + * + * Starting in sync pass 8 (zfs_sync_pass_dont_compress), we disable + * compression (including of metadata). In practice, we don't have this + * many sync passes, so this has no effect. + * + * The original intent was that disabling compression would help the sync + * passes to converge. However, in practice disabling compression increases + * the average number of sync passes, because when we turn compression off, a + * lot of block's size will change and thus we have to re-allocate (not + * overwrite) them. It also increases the number of 128KB allocations (e.g. + * for indirect blocks and spacemaps) because these will not be compressed. + * The 128K allocations are especially detrimental to performance on highly + * fragmented systems, which may have very few free segments of this size, + * and may need to load new metaslabs to satisfy 128K allocations. */ int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */ -int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */ +int zfs_sync_pass_dont_compress = 8; /* don't compress starting in this pass */ int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ /* @@ -107,6 +121,11 @@ int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ */ #define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) +/* + * Enable smaller cores by excluding metadata + * allocations as well. + */ +int zio_exclude_metadata = 0; int zio_requeue_io_start_cut_in_line = 1; #ifdef ZFS_DEBUG @@ -139,7 +158,11 @@ zio_init(void) size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t p2 = size; size_t align = 0; - size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0; + size_t data_cflags, cflags; + + data_cflags = KMC_NODEBUG; + cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ? + KMC_NODEBUG : 0; #if defined(_ILP32) && defined(_KERNEL) /* @@ -187,7 +210,7 @@ zio_init(void) (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); zio_data_buf_cache[c] = kmem_cache_create(name, size, align, NULL, NULL, NULL, NULL, - data_alloc_arena, cflags); + data_alloc_arena, data_cflags); } } @@ -331,12 +354,6 @@ zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize, { zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); - /* - * Ensure that anyone expecting this zio to contain a linear ABD isn't - * going to get a nasty surprise when they try to access the data. - */ - IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data)); - zt->zt_orig_abd = zio->io_abd; zt->zt_orig_size = zio->io_size; zt->zt_bufsize = bufsize; @@ -873,8 +890,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) return (zio_null(NULL, spa, NULL, done, private, flags)); } -void -zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) +static void +zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held) { if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { zfs_panic_recover("blkptr at %p has invalid TYPE %llu", @@ -900,7 +917,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) } if (BP_IS_EMBEDDED(bp)) { - if (BPE_GET_ETYPE(bp) > NUM_BP_EMBEDDED_TYPES) { + if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) { zfs_panic_recover("blkptr at %p has invalid ETYPE %llu", bp, (longlong_t)BPE_GET_ETYPE(bp)); } @@ -913,6 +930,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) if (!spa->spa_trust_config) return; + if (!config_held) + spa_config_enter(spa, SCL_VDEV, bp, RW_READER); + else + ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER)); /* * Pool-specific checks. * @@ -961,6 +982,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) bp, i, (longlong_t)offset); } } + if (!config_held) + spa_config_exit(spa, SCL_VDEV, bp); } boolean_t @@ -1000,7 +1023,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, @@ -1093,7 +1116,7 @@ void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, B_FALSE); /* * The check for EMBEDDED is a performance optimization. We @@ -1158,7 +1181,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); if (BP_IS_EMBEDDED(bp)) return (zio_null(pio, spa, NULL, NULL, NULL, 0)); @@ -2848,6 +2871,20 @@ zio_nop_write(zio_t *zio) ASSERT(bcmp(&bp->blk_prop, &bp_orig->blk_prop, sizeof (uint64_t)) == 0); + /* + * If we're overwriting a block that is currently on an + * indirect vdev, then ignore the nopwrite request and + * allow a new block to be allocated on a concrete vdev. + */ + spa_config_enter(zio->io_spa, SCL_VDEV, FTAG, RW_READER); + vdev_t *tvd = vdev_lookup_top(zio->io_spa, + DVA_GET_VDEV(&bp->blk_dva[0])); + if (tvd->vdev_ops == &vdev_indirect_ops) { + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + return (zio); + } + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + *bp = *bp_orig; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_flags |= ZIO_FLAG_NOPWRITE; @@ -3178,7 +3215,9 @@ zio_ddt_write(zio_t *zio) BP_ZERO(bp); } else { zp->zp_dedup = B_FALSE; + BP_SET_DEDUP(bp, B_FALSE); } + ASSERT(!BP_GET_DEDUP(bp)); zio->io_pipeline = ZIO_WRITE_PIPELINE; ddt_exit(ddt); return (zio); diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index 7b148375d0c2..179fab5de365 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -308,7 +308,7 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) mutex_exit(&spa->spa_cksum_tmpls_lock); } -/* convenience function to update a checksum to accomodate an encryption MAC */ +/* convenience function to update a checksum to accommodate an encryption MAC */ static void zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor) { diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index f5cbc3e8218a..01c51347fec3 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -155,11 +155,10 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, abd_return_buf(src, tmp, s_len); /* - * Decompression shouldn't fail, because we've already verifyied + * Decompression shouldn't fail, because we've already verified * the checksum. However, for extra protection (e.g. against bitflips * in non-ECC RAM), we handle this error (and test it). */ - ASSERT0(ret); if (zio_decompress_fail_fraction != 0 && spa_get_random(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index eb781b64fa1d..7ce2b1bf4078 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -369,7 +369,7 @@ zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) /* * This function handles all encryption and decryption in zfs. When * encrypting it expects puio to reference the plaintext and cuio to - * reference the cphertext. cuio must have enough space for the + * reference the ciphertext. cuio must have enough space for the * ciphertext + room for a MAC. datalen should be the length of the * plaintext / ciphertext alone. */ @@ -549,12 +549,12 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) { - int ret; crypto_mechanism_t mech; uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint_t enc_len, keydata_len, aad_len; + int ret; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -934,7 +934,7 @@ zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) /* * At L0 we want to verify these fields to ensure that data blocks - * can not be reinterpretted. For instance, we do not want an attacker + * can not be reinterpreted. For instance, we do not want an attacker * to trick us into returning raw lz4 compressed data to the user * by modifying the compression bits. At higher levels, we cannot * enforce this policy since raw sends do not convey any information diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 78896d3dc38b..d8af503bdfc2 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -113,7 +113,7 @@ freq_triggered(uint32_t frequency) return (B_TRUE); /* - * Note: we still handle legacy (unscaled) frequecy values + * Note: we still handle legacy (unscaled) frequency values */ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 3f3b2e2dc53c..5660f8b0e56f 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -226,6 +226,12 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + /* + * The VFS does not apply the umask, therefore it is applied here + * when POSIX ACLs are not enabled. + */ + if (!IS_POSIXACL(dir)) + mode &= ~current_umask(); zpl_vap_init(vap, dir, mode, cr); cookie = spl_fstrans_mark(); diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index 216c79401526..810ab28988a7 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -297,7 +297,7 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) * The dsl pool lock must be released prior to calling sget(). * It is possible sget() may block on the lock in grab_super() * while deactivate_super() holds that same lock and waits for - * a txg sync. If the dsl_pool lock is held over over sget() + * a txg sync. If the dsl_pool lock is held over sget() * this can prevent the pool sync and cause a deadlock. */ dsl_pool_rele(dmu_objset_pool(os), FTAG); diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 8ee6e9a97f0a..95523f28e3b4 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -1130,12 +1130,9 @@ zpl_init_acl(struct inode *ip, struct inode *dir) return (0); if (!S_ISLNK(ip->i_mode)) { - if (ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) { - acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return (PTR_ERR(acl)); - } - + acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return (PTR_ERR(acl)); if (!acl) { ip->i_mode &= ~current_umask(); ip->i_ctime = current_time(ip); @@ -1144,7 +1141,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir) } } - if ((ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) && acl) { + if (acl) { umode_t mode; if (S_ISDIR(ip->i_mode)) { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c29f65f676b9..93719dcca5fd 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -684,7 +684,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, itx_wr_state_t wr_state = write_state; ssize_t len = size; - if (wr_state == WR_COPIED && size > ZIL_MAX_COPIED_DATA) + if (wr_state == WR_COPIED && size > zil_max_copied_data(zilog)) wr_state = WR_NEED_COPY; else if (wr_state == WR_INDIRECT) len = MIN(blocksize - P2PHASE(offset, blocksize), size); @@ -783,7 +783,7 @@ zvol_write(void *arg) if (error) break; } - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); int64_t nwritten = start_resid - uio.uio_resid; dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); @@ -878,7 +878,7 @@ zvol_discard(void *arg) ZVOL_OBJ, start, size); } unlock: - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); if (error == 0 && sync) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -924,7 +924,7 @@ zvol_read(void *arg) break; } } - rangelock_exit(zvr->lr); + zfs_rangelock_exit(zvr->lr); int64_t nread = start_resid - uio.uio_resid; dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); @@ -944,7 +944,7 @@ zvol_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(zgd->zgd_lr); kmem_free(zgd, sizeof (zgd_t)); } @@ -977,8 +977,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size, - RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + size, RL_READER); error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf, DMU_READ_NO_PREFETCH); } else { /* indirect write */ @@ -990,8 +990,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) */ size = zv->zv_volblocksize; offset = P2ALIGN_TYPED(offset, size, uint64_t); - zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size, - RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + size, RL_READER); error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db, DMU_READ_NO_PREFETCH); if (error == 0) { @@ -1089,7 +1089,7 @@ zvol_request(struct request_queue *q, struct bio *bio) * are asynchronous, we take it here synchronously to make * sure overlapped I/Os are properly ordered. */ - zvr->lr = rangelock_enter(&zv->zv_rangelock, offset, size, + zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size, RL_WRITER); /* * Sync writes and discards execute zil_commit() which may need @@ -1128,7 +1128,7 @@ zvol_request(struct request_queue *q, struct bio *bio) rw_enter(&zv->zv_suspend_lock, RW_READER); - zvr->lr = rangelock_enter(&zv->zv_rangelock, offset, size, + zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size, RL_READER); if (zvol_request_sync || taskq_dispatch(zvol_taskq, zvol_read, zvr, TQ_SLEEP) == TASKQID_INVALID) @@ -1725,7 +1725,7 @@ zvol_alloc(dev_t dev, const char *name) zv->zv_open_count = 0; strlcpy(zv->zv_name, name, MAXNAMELEN); - rangelock_init(&zv->zv_rangelock, NULL, NULL); + zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL); rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL); zv->zv_disk->major = zvol_major; @@ -1783,7 +1783,7 @@ zvol_free(void *arg) ASSERT(zv->zv_disk->private_data == NULL); rw_destroy(&zv->zv_suspend_lock); - rangelock_fini(&zv->zv_rangelock); + zfs_rangelock_fini(&zv->zv_rangelock); del_gendisk(zv->zv_disk); blk_cleanup_queue(zv->zv_queue); @@ -1876,6 +1876,10 @@ zvol_create_minor_impl(const char *name) #ifdef QUEUE_FLAG_ADD_RANDOM blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue); #endif + /* This flag was introduced in kernel version 4.12. */ +#ifdef QUEUE_FLAG_SCSI_PASSTHROUGH + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_queue); +#endif if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) @@ -1993,7 +1997,7 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg) /* at this point, the dsname should name a snapshot */ if (strchr(dsname, '@') == 0) { dprintf("zvol_create_snap_minor_cb(): " - "%s is not a shapshot name\n", dsname); + "%s is not a snapshot name\n", dsname); } else { minors_job_t *job; char *n = strdup(dsname); diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index 568bef988ca0..d87293686422 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -73,7 +73,7 @@ exit 1 %preun # Are we doing an upgrade? -if [ $1 -ne 0 ] ; then +if [ "$1" = "1" -o "$1" = "upgrade" ] ; then # Yes we are. Are we upgrading to a new ZFS version? NEWEST_VER=$(dkms status zfs | sed 's/,//g' | sort -r -V | awk '/installed/{print $2; exit}') if [ "$NEWEST_VER" != "%{version}" ] ; then diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 9faa3ba771a1..545627d4bfba 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -53,10 +53,6 @@ %bcond_with asan %bcond_with systemd -# Exclude test-runner.py from the rpmbuild shebang check to allow it to run -# under Python 2 and 3. -%global __brp_mangle_shebangs_exclude_from test-runner.py - # Generic enable switch for systemd %if %{with systemd} %define _systemd 1 @@ -99,6 +95,7 @@ %define __python_cffi_pkg python%{__python_pkg_version}-cffi %define __python_setuptools_pkg python%{__python_pkg_version}-setuptools %endif +%define __python_sitelib %(%{__python} -Esc "from distutils.sysconfig import get_python_lib; print(get_python_lib())") # By default python-pyzfs is enabled, with the exception of # RHEL 6 which by default uses Python 2.6 which is too old. @@ -138,7 +135,7 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel -%if 0%{?fedora} >= 28 +%if 0%{?fedora} >= 28 || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif Requires: openssl @@ -255,7 +252,8 @@ validating the file system. %package dracut Summary: Dracut module Group: System Environment/Kernel -Requires: %{name}%{?_isa} = %{version}-%{release} +BuildArch: noarch +Requires: %{name} >= %{version} Requires: dracut Requires: /usr/bin/awk Requires: grep @@ -320,7 +318,7 @@ image which is ZFS aware. %if 0%{?_systemd} %define systemd --enable-systemd --with-systemdunitdir=%{_unitdir} --with-systemdpresetdir=%{_presetdir} --with-systemdmodulesloaddir=%{_modulesloaddir} --with-systemdgeneratordir=%{_systemdgeneratordir} --disable-sysvinit - %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target + %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target zfs-volume-wait.service zfs-volumes.target %else %define systemd --enable-sysvinit --disable-systemd %endif @@ -352,6 +350,14 @@ make %{?_smp_mflags} %{__rm} -rf $RPM_BUILD_ROOT make install DESTDIR=%{?buildroot} find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \; +%if 0%{!?__brp_mangle_shebangs:1} +find %{?buildroot}%{_bindir} \ + \( -name arc_summary -or -name arcstat -or -name dbufstat \) \ + -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \; +find %{?buildroot}%{_datadir} \ + \( -name test-runner.py -or -name zts-report.py \) \ + -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \; +%endif %post %if 0%{?_systemd} @@ -417,6 +423,7 @@ systemctl --system daemon-reload >/dev/null || true %{_sbindir}/* %{_bindir}/raidz_test %{_bindir}/zgenhostid +%{_bindir}/zvol_wait # Optional Python 2/3 scripts %{_bindir}/arc_summary %{_bindir}/arcstat @@ -430,6 +437,14 @@ systemctl --system daemon-reload >/dev/null || true %{_udevdir}/vdev_id %{_udevdir}/zvol_id %{_udevdir}/rules.d/* +%if ! 0%{?_systemd} || 0%{?_initramfs} +# Files needed for sysvinit and initramfs-tools +%{_sysconfdir}/%{name}/zfs-functions +%config(noreplace) %{_initconfdir}/zfs +%else +%exclude %{_sysconfdir}/%{name}/zfs-functions +%exclude %{_initconfdir}/zfs +%endif %if 0%{?_systemd} %{_unitdir}/* %{_presetdir}/* @@ -437,9 +452,10 @@ systemctl --system daemon-reload >/dev/null || true %{_systemdgeneratordir}/* %else %config(noreplace) %{_sysconfdir}/init.d/* -%config(noreplace) %{_initconfdir}/zfs %endif -%config(noreplace) %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/zed.d/* +%config(noreplace) %{_sysconfdir}/%{name}/zpool.d/* +%config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example %attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/* %files -n libzpool2 @@ -455,8 +471,8 @@ systemctl --system daemon-reload >/dev/null || true %{_libdir}/libzfs*.so.* %files -n libzfs2-devel -%{_datadir}/pkgconfig/libzfs.pc -%{_datadir}/pkgconfig/libzfs_core.pc +%{_datarootdir}/pkgconfig/libzfs.pc +%{_datarootdir}/pkgconfig/libzfs_core.pc %{_libdir}/*.so %{_includedir}/* %doc AUTHORS COPYRIGHT LICENSE NOTICE README.md @@ -473,8 +489,8 @@ systemctl --system daemon-reload >/dev/null || true %doc contrib/pyzfs/README %doc contrib/pyzfs/LICENSE %defattr(-,root,root,-) -%{python_sitelib}/libzfs_core/* -%{python_sitelib}/pyzfs* +%{__python_sitelib}/libzfs_core/* +%{__python_sitelib}/pyzfs* %endif %if 0%{?_initramfs} diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in index 473f2d032509..f632c4867e63 100644 --- a/rpm/redhat/zfs-kmod.spec.in +++ b/rpm/redhat/zfs-kmod.spec.in @@ -41,6 +41,7 @@ This package contains the ZFS kernel modules. %package -n kmod-%{kmod_name}-devel Summary: ZFS kernel module(s) devel common Group: System Environment/Kernel +Provides: kmod-spl-devel = %{version} %description -n kmod-%{kmod_name}-devel This package provides the header files and objects to build kernel modules. diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 11e963c527a8..d275a41c4e04 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -60,7 +60,7 @@ all-local: -e '\|^export SBIN_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export ZTS_DIR=|s|$$|@abs_top_srcdir@/tests|' \ -e '\|^export SCRIPT_DIR=|s|$$|@abs_top_srcdir@/scripts|' \ - common.sh.in >common.sh + $(abs_top_srcdir)/scripts/common.sh.in >common.sh -echo "$$EXTRA_ENVIRONMENT" >>common.sh clean-local: @@ -71,4 +71,5 @@ install-data-hook: -e '\|^export SBIN_DIR=|s|$$|@sbindir@|' \ -e '\|^export ZTS_DIR=|s|$$|@datadir@/@PACKAGE@|' \ -e '\|^export SCRIPT_DIR=|s|$$|@datadir@/@PACKAGE@|' \ - common.sh.in >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh + $(abs_top_srcdir)/scripts/common.sh.in \ + >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh diff --git a/scripts/kmodtool b/scripts/kmodtool index 27a14cdac23a..b928c9286204 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -144,7 +144,13 @@ print_rpmtemplate_per_kmodpkg () local kernel_uname_r=${1} local kernel_variant="${2:+-${2}}" - # first part + # Detect depmod install location + local depmod_path=/sbin/depmod + if [ ! -f ${depmod_path} ]; then + depmod_path=/usr/sbin/depmod + fi + + # first part cat <= %{?epoch:%{epoch}:}%{version} -Requires(post): ${prefix}/sbin/depmod -Requires(postun): ${prefix}/sbin/depmod + +%if 0%{?rhel} == 6 || 0%{?centos} == 6 +Requires(post): module-init-tools +Requires(postun): module-init-tools +%else +Requires(post): kmod +Requires(postun): kmod +%endif EOF if [[ ${obsolete_name} ]]; then @@ -170,17 +182,17 @@ BuildRequires: kernel-devel-uname-r = ${kernel_uname_r} %{?KmodsRequires:Requires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %{?KmodsRequires:BuildRequires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %post -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : +${prefix}${depmod_path} -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : +${prefix}${depmod_path} -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : EOF else cat < /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -[[ "$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : EOF fi @@ -397,7 +409,7 @@ print_rpmtemplate () # and print it and some other required stuff as macro print_rpmtemplate_header - # now print the packages itselfs + # now print the packages for kernel in ${kernel_versions_to_build_for} ; do local kernel_verrelarch=${kernel%%${kernels_known_variants}} @@ -489,7 +501,7 @@ while [ "${1}" ] ; do --obsolete-name) shift if [[ ! "${1}" ]] ; then - error_out 2 "Please provide the name of the kmod to obsolte together with --obsolete-name" >&2 + error_out 2 "Please provide the name of the kmod to obsolete together with --obsolete-name" >&2 fi obsolete_name="${1}" shift @@ -497,7 +509,7 @@ while [ "${1}" ] ; do --obsolete-version) shift if [[ ! "${1}" ]] ; then - error_out 2 "Please provide the version of the kmod to obsolte together with --obsolete-version" >&2 + error_out 2 "Please provide the version of the kmod to obsolete together with --obsolete-version" >&2 fi obsolete_version="${1}" shift diff --git a/scripts/make_gitrev.sh b/scripts/make_gitrev.sh index bab9be88d734..1cf143794b26 100755 --- a/scripts/make_gitrev.sh +++ b/scripts/make_gitrev.sh @@ -39,3 +39,7 @@ trap cleanup EXIT git rev-parse --git-dir > /dev/null 2>&1 # Get the git current git revision ZFS_GIT_REV=$(git describe --always --long --dirty 2>/dev/null) +# Check if header file already contain the exact string +grep -sq "\"${ZFS_GIT_REV}\"" "$(dirname "$0")"/../include/zfs_gitrev.h && + trap - EXIT +exit 0 diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh index 7c5286ba70ff..ce766e239823 100755 --- a/scripts/zfs-tests.sh +++ b/scripts/zfs-tests.sh @@ -31,7 +31,7 @@ fi PROG=zfs-tests.sh VERBOSE="no" -QUIET= +QUIET="" CLEANUP="yes" CLEANUPALL="no" LOOPBACK="yes" @@ -307,7 +307,7 @@ while getopts 'hvqxkfScn:d:s:r:?t:T:u:I:' OPTION; do VERBOSE="yes" ;; q) - QUIET="-q" + QUIET="yes" ;; x) CLEANUPALL="yes" @@ -602,10 +602,17 @@ REPORT_FILE=$(mktemp -u -t zts-report.XXXX -p "$FILEDIR") # # Run all the tests as specified. # -msg "${TEST_RUNNER} ${QUIET} -c ${RUNFILE} -T ${TAGS} -i ${STF_SUITE}" \ - "-I ${ITERATIONS}" -${TEST_RUNNER} ${QUIET} -c "${RUNFILE}" -T "${TAGS}" -i "${STF_SUITE}" \ - -I "${ITERATIONS}" 2>&1 | tee "$RESULTS_FILE" +msg "${TEST_RUNNER} ${QUIET:+-q}" \ + "-c \"${RUNFILE}\"" \ + "-T \"${TAGS}\"" \ + "-i \"${STF_SUITE}\"" \ + "-I \"${ITERATIONS}\"" +${TEST_RUNNER} ${QUIET:+-q} \ + -c "${RUNFILE}" \ + -T "${TAGS}" \ + -i "${STF_SUITE}" \ + -I "${ITERATIONS}" \ + 2>&1 | tee "$RESULTS_FILE" # # Analyze the results. diff --git a/tests/README.md b/tests/README.md index 7b3768c29110..b2c7f99c7098 100644 --- a/tests/README.md +++ b/tests/README.md @@ -78,7 +78,7 @@ The following zfs-tests.sh options are supported: when test-runner exists. This is useful when the results of a specific test need to be preserved for further analysis. - -f Use sparse files directly instread of loopback devices for + -f Use sparse files directly instead of loopback devices for the testing. When running in this mode certain tests will be skipped which depend on real block devices. diff --git a/tests/runfiles/Makefile.am b/tests/runfiles/Makefile.am index 138d905a5722..4625806ff8ba 100644 --- a/tests/runfiles/Makefile.am +++ b/tests/runfiles/Makefile.am @@ -1,2 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/runfiles -dist_pkgdata_DATA = *.run +dist_pkgdata_DATA = \ + linux.run \ + longevity.run \ + perf-regression.run diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8219cf42b101..ae15cd221784 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -33,7 +33,8 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', tags = ['functional', 'alloc_class'] [tests/functional/arc] -tests = ['dbufstats_001_pos', 'dbufstats_002_pos'] +tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos', + 'arcstats_runtime_tuning'] tags = ['functional', 'arc'] [tests/functional/atime] @@ -87,7 +88,7 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit', 'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict', 'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult', 'tst.rollback_one', 'tst.snapshot_destroy', 'tst.snapshot_neg', - 'tst.snapshot_recursive', 'tst.snapshot_simple'] + 'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.terminate_by_signal'] tags = ['functional', 'channel_program', 'synctask_core'] [tests/functional/chattr] @@ -106,7 +107,7 @@ tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', - 'zdb_005_pos', 'zdb_006_pos'] + 'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum', 'zdb_decompress'] pre = post = tags = ['functional', 'cli_root', 'zdb'] @@ -122,7 +123,7 @@ tags = ['functional', 'cli_root', 'zfs_bookmark'] [tests/functional/cli_root/zfs_change-key] tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', - 'zfs_change-key_pbkdf2iters'] + 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones'] tags = ['functional', 'cli_root', 'zfs_change-key'] [tests/functional/cli_root/zfs_clone] @@ -182,7 +183,8 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', - 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', + 'zfs_mount_test_race'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] @@ -266,8 +268,8 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', tags = ['functional', 'cli_root', 'zfs_snapshot'] [tests/functional/cli_root/zfs_sysfs] -tests = ['zfeature_set_unsupported.ksh', 'zfs_get_unsupported', - 'zfs_set_unsupported', 'zfs_sysfs_live.ksh', 'zpool_get_unsupported', +tests = ['zfeature_set_unsupported', 'zfs_get_unsupported', + 'zfs_set_unsupported', 'zfs_sysfs_live', 'zpool_get_unsupported', 'zpool_set_unsupported'] tags = ['functional', 'cli_root', 'zfs_sysfs'] @@ -360,7 +362,7 @@ tags = ['functional', 'cli_root', 'zpool_export'] [tests/functional/cli_root/zpool_get] tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos', - 'zpool_get_004_neg'] + 'zpool_get_004_neg', 'zpool_get_005_pos'] tags = ['functional', 'cli_root', 'zpool_get'] [tests/functional/cli_root/zpool_history] @@ -461,10 +463,7 @@ tests = ['zpool_split_cliargs', 'zpool_split_devices', tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] -tests = ['zpool_status_001_pos', 'zpool_status_002_pos','zpool_status_003_pos', - 'zpool_status_-c_disable', 'zpool_status_-c_homedir', - 'zpool_status_-c_searchpath'] -user = +tests = ['zpool_status_001_pos', 'zpool_status_002_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] @@ -505,7 +504,7 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', - 'arc_summary_001_pos', 'arc_summary_002_neg', 'dbufstat_001_pos'] + 'arc_summary_001_pos', 'arc_summary_002_neg'] user = tags = ['functional', 'cli_user', 'misc'] @@ -528,6 +527,12 @@ tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = tags = ['functional', 'cli_user', 'zpool_list'] +[tests/functional/cli_user/zpool_status] +tests = ['zpool_status_003_pos', 'zpool_status_-c_disable', + 'zpool_status_-c_homedir', 'zpool_status_-c_searchpath'] +user = +tags = ['functional', 'cli_user', 'zpool_status'] + [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'compress_004_pos'] @@ -635,7 +640,7 @@ tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count', tags = ['functional', 'limits'] [tests/functional/link_count] -tests = ['link_count_001'] +tests = ['link_count_001', 'link_count_root_inode'] tags = ['functional', 'link_count'] [tests/functional/migration] @@ -653,7 +658,7 @@ tags = ['functional', 'mmap'] tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', - 'mmp_on_zdb', 'mmp_write_distribution'] + 'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid'] tags = ['functional', 'mmp'] [tests/functional/mount] @@ -746,19 +751,21 @@ tags = ['functional', 'redundancy'] [tests/functional/refquota] tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos', - 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg'] + 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg', + 'refquota_007_neg', 'refquota_008_neg'] tags = ['functional', 'refquota'] [tests/functional/refreserv] tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos', - 'refreserv_004_pos', 'refreserv_005_pos'] + 'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz', + 'refreserv_raidz'] tags = ['functional', 'refreserv'] [tests/functional/removal] pre = -tests = ['removal_all_vdev', 'removal_check_space', +tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', - 'removal_remap', 'removal_remap_deadlists', + 'removal_remap', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', 'removal_with_errors', 'removal_with_export', @@ -766,7 +773,8 @@ tests = ['removal_all_vdev', 'removal_check_space', 'removal_with_remove', 'removal_with_scrub', 'removal_with_send', 'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write', 'removal_with_zdb', 'remove_expanded', - 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz'] + 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz', + 'remove_indirect'] tags = ['functional', 'removal'] [tests/functional/rename_dirs] @@ -788,6 +796,10 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', 'reservation_022_pos'] tags = ['functional', 'reservation'] +[tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [tests/functional/rootpool] tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos'] tags = ['functional', 'rootpool'] @@ -820,8 +832,8 @@ tags = ['functional', 'scrub_mirror'] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs', - 'slog_replay_volume'] + 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001', + 'slog_replay_fs_002', 'slog_replay_volume'] tags = ['functional', 'slog'] [tests/functional/snapshot] @@ -843,12 +855,18 @@ tags = ['functional', 'snapused'] tests = ['sparse_001_pos'] tags = ['functional', 'sparse'] +[tests/functional/suid] +tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', + 'suid_write_to_none'] +tags = ['functional', 'suid'] + [tests/functional/threadsappend] tests = ['threadsappend_001_pos'] tags = ['functional', 'threadsappend'] [tests/functional/tmpfile] -tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos'] +tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos', + 'tmpfile_stat_mode'] tags = ['functional', 'tmpfile'] [tests/functional/trim] diff --git a/tests/test-runner/bin/Makefile.am b/tests/test-runner/bin/Makefile.am index 30c564e55533..2c031f745503 100644 --- a/tests/test-runner/bin/Makefile.am +++ b/tests/test-runner/bin/Makefile.am @@ -3,13 +3,13 @@ dist_pkgdata_SCRIPTS = \ test-runner.py \ zts-report.py # -# These scripts are compatibile with both Python 2.6 and 3.4. As such the +# These scripts are compatible with both Python 2.6 and 3.4. As such the # python 3 shebang can be replaced at install time when targeting a python # 2 system. This allows us to maintain a single version of the source. # if USING_PYTHON_2 install-data-hook: - sed --in-place 's|^#!/usr/bin/python3|#!/usr/bin/python2|' \ + sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \ $(DESTDIR)$(pkgdatadir)/test-runner.py \ $(DESTDIR)$(pkgdatadir)/zts-report.py endif diff --git a/tests/test-runner/bin/test-runner.py b/tests/test-runner/bin/test-runner.py index ea37e8ab6f88..ca08b3754115 100755 --- a/tests/test-runner/bin/test-runner.py +++ b/tests/test-runner/bin/test-runner.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # # This file and its contents are supplied under the terms of the @@ -307,7 +307,7 @@ def log(self, options): This function is responsible for writing all output. This includes the console output, the logfile of all results (with timestamped merged stdout and stderr), and for each test, the unmodified - stdout/stderr/merged in it's own file. + stdout/stderr/merged in its own file. """ logname = getpwuid(os.getuid()).pw_name @@ -716,7 +716,7 @@ def complete_outputdirs(self): def setup_logging(self, options): """ - This funtion creates the output directory and gets a file object + This function creates the output directory and gets a file object for the logfile. This function must be called before write_log() can be used. """ diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py index d046c13a55ef..600079fbee81 100755 --- a/tests/test-runner/bin/zts-report.py +++ b/tests/test-runner/bin/zts-report.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # This file and its contents are supplied under the terms of the @@ -160,17 +160,9 @@ # reasons listed above can be used. # known = { - 'casenorm/sensitive_none_lookup': ['FAIL', '7633'], - 'casenorm/sensitive_none_delete': ['FAIL', '7633'], 'casenorm/sensitive_formd_lookup': ['FAIL', '7633'], 'casenorm/sensitive_formd_delete': ['FAIL', '7633'], - 'casenorm/insensitive_none_lookup': ['FAIL', '7633'], - 'casenorm/insensitive_none_delete': ['FAIL', '7633'], - 'casenorm/insensitive_formd_lookup': ['FAIL', '7633'], - 'casenorm/insensitive_formd_delete': ['FAIL', '7633'], - 'casenorm/mixed_none_lookup': ['FAIL', '7633'], 'casenorm/mixed_none_lookup_ci': ['FAIL', '7633'], - 'casenorm/mixed_none_delete': ['FAIL', '7633'], 'casenorm/mixed_formd_lookup': ['FAIL', '7633'], 'casenorm/mixed_formd_lookup_ci': ['FAIL', '7633'], 'casenorm/mixed_formd_delete': ['FAIL', '7633'], diff --git a/tests/test-runner/include/logapi.shlib b/tests/test-runner/include/logapi.shlib index 32fc00616180..cd7982a94a0b 100644 --- a/tests/test-runner/include/logapi.shlib +++ b/tests/test-runner/include/logapi.shlib @@ -198,12 +198,12 @@ function log_neg_expect elif (( $status == 127 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (File not found)" - # bus error - core dump - elif (( $status == 138 )); then + # bus error - core dump (256+signal, SIGBUS=7) + elif (( $status == 263 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (Bus Error)" - # segmentation violation - core dump - elif (( $status == 139 )); then + # segmentation violation - core dump (256+signal, SIGSEGV=11) + elif (( $status == 267 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (SEGV)" else diff --git a/tests/test-runner/man/test-runner.1 b/tests/test-runner/man/test-runner.1 index 31cd412452b8..95255073b705 100644 --- a/tests/test-runner/man/test-runner.1 +++ b/tests/test-runner/man/test-runner.1 @@ -103,7 +103,7 @@ The file has one section named "DEFAULT," which contains configuration option names and their values in "name = value" format. The values in this section apply to all the subsequent sections, unless they are also specified there, in which case the default is overridden. The remaining section names are the -absolute pathnames of files and direcotries, describing tests and test groups +absolute pathnames of files and directories, describing tests and test groups respectively. The legal option names are: .sp .ne 2 @@ -248,7 +248,7 @@ Run \fIscript\fR after any test or test group. \fB-q\fR .ad .RS 6n -Print only the results sumary to the standard output. +Print only the results summary to the standard output. .RE .ne 2 diff --git a/tests/zfs-tests/cmd/btree_test/.gitignore b/tests/zfs-tests/cmd/btree_test/.gitignore new file mode 100644 index 000000000000..73777c4c1f4b --- /dev/null +++ b/tests/zfs-tests/cmd/btree_test/.gitignore @@ -0,0 +1 @@ +/btree_test diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index 977b9e2f3ddd..ef388eaef473 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -816,8 +816,8 @@ zfs_ioc_input_tests(const char *pool) enum zfs_ioc_ref { ZFS_IOC_BASE = ('Z' << 8), - LINUX_IOC_BASE = ('Z' << 8) + 0x80, - FREEBSD_IOC_BASE = ('Z' << 8) + 0xC0, + LINUX_IOC_BASE = ZFS_IOC_BASE + 0x80, + FREEBSD_IOC_BASE = ZFS_IOC_BASE + 0xC0, }; /* @@ -827,91 +827,103 @@ enum zfs_ioc_ref { boolean_t validate_ioc_values(void) { - return ( - ZFS_IOC_BASE + 0 == ZFS_IOC_POOL_CREATE && - ZFS_IOC_BASE + 1 == ZFS_IOC_POOL_DESTROY && - ZFS_IOC_BASE + 2 == ZFS_IOC_POOL_IMPORT && - ZFS_IOC_BASE + 3 == ZFS_IOC_POOL_EXPORT && - ZFS_IOC_BASE + 4 == ZFS_IOC_POOL_CONFIGS && - ZFS_IOC_BASE + 5 == ZFS_IOC_POOL_STATS && - ZFS_IOC_BASE + 6 == ZFS_IOC_POOL_TRYIMPORT && - ZFS_IOC_BASE + 7 == ZFS_IOC_POOL_SCAN && - ZFS_IOC_BASE + 8 == ZFS_IOC_POOL_FREEZE && - ZFS_IOC_BASE + 9 == ZFS_IOC_POOL_UPGRADE && - ZFS_IOC_BASE + 10 == ZFS_IOC_POOL_GET_HISTORY && - ZFS_IOC_BASE + 11 == ZFS_IOC_VDEV_ADD && - ZFS_IOC_BASE + 12 == ZFS_IOC_VDEV_REMOVE && - ZFS_IOC_BASE + 13 == ZFS_IOC_VDEV_SET_STATE && - ZFS_IOC_BASE + 14 == ZFS_IOC_VDEV_ATTACH && - ZFS_IOC_BASE + 15 == ZFS_IOC_VDEV_DETACH && - ZFS_IOC_BASE + 16 == ZFS_IOC_VDEV_SETPATH && - ZFS_IOC_BASE + 17 == ZFS_IOC_VDEV_SETFRU && - ZFS_IOC_BASE + 18 == ZFS_IOC_OBJSET_STATS && - ZFS_IOC_BASE + 19 == ZFS_IOC_OBJSET_ZPLPROPS && - ZFS_IOC_BASE + 20 == ZFS_IOC_DATASET_LIST_NEXT && - ZFS_IOC_BASE + 21 == ZFS_IOC_SNAPSHOT_LIST_NEXT && - ZFS_IOC_BASE + 22 == ZFS_IOC_SET_PROP && - ZFS_IOC_BASE + 23 == ZFS_IOC_CREATE && - ZFS_IOC_BASE + 24 == ZFS_IOC_DESTROY && - ZFS_IOC_BASE + 25 == ZFS_IOC_ROLLBACK && - ZFS_IOC_BASE + 26 == ZFS_IOC_RENAME && - ZFS_IOC_BASE + 27 == ZFS_IOC_RECV && - ZFS_IOC_BASE + 28 == ZFS_IOC_SEND && - ZFS_IOC_BASE + 29 == ZFS_IOC_INJECT_FAULT && - ZFS_IOC_BASE + 30 == ZFS_IOC_CLEAR_FAULT && - ZFS_IOC_BASE + 31 == ZFS_IOC_INJECT_LIST_NEXT && - ZFS_IOC_BASE + 32 == ZFS_IOC_ERROR_LOG && - ZFS_IOC_BASE + 33 == ZFS_IOC_CLEAR && - ZFS_IOC_BASE + 34 == ZFS_IOC_PROMOTE && - ZFS_IOC_BASE + 35 == ZFS_IOC_SNAPSHOT && - ZFS_IOC_BASE + 36 == ZFS_IOC_DSOBJ_TO_DSNAME && - ZFS_IOC_BASE + 37 == ZFS_IOC_OBJ_TO_PATH && - ZFS_IOC_BASE + 38 == ZFS_IOC_POOL_SET_PROPS && - ZFS_IOC_BASE + 39 == ZFS_IOC_POOL_GET_PROPS && - ZFS_IOC_BASE + 40 == ZFS_IOC_SET_FSACL && - ZFS_IOC_BASE + 41 == ZFS_IOC_GET_FSACL && - ZFS_IOC_BASE + 42 == ZFS_IOC_SHARE && - ZFS_IOC_BASE + 43 == ZFS_IOC_INHERIT_PROP && - ZFS_IOC_BASE + 44 == ZFS_IOC_SMB_ACL && - ZFS_IOC_BASE + 45 == ZFS_IOC_USERSPACE_ONE && - ZFS_IOC_BASE + 46 == ZFS_IOC_USERSPACE_MANY && - ZFS_IOC_BASE + 47 == ZFS_IOC_USERSPACE_UPGRADE && - ZFS_IOC_BASE + 48 == ZFS_IOC_HOLD && - ZFS_IOC_BASE + 49 == ZFS_IOC_RELEASE && - ZFS_IOC_BASE + 50 == ZFS_IOC_GET_HOLDS && - ZFS_IOC_BASE + 51 == ZFS_IOC_OBJSET_RECVD_PROPS && - ZFS_IOC_BASE + 52 == ZFS_IOC_VDEV_SPLIT && - ZFS_IOC_BASE + 53 == ZFS_IOC_NEXT_OBJ && - ZFS_IOC_BASE + 54 == ZFS_IOC_DIFF && - ZFS_IOC_BASE + 55 == ZFS_IOC_TMP_SNAPSHOT && - ZFS_IOC_BASE + 56 == ZFS_IOC_OBJ_TO_STATS && - ZFS_IOC_BASE + 57 == ZFS_IOC_SPACE_WRITTEN && - ZFS_IOC_BASE + 58 == ZFS_IOC_SPACE_SNAPS && - ZFS_IOC_BASE + 59 == ZFS_IOC_DESTROY_SNAPS && - ZFS_IOC_BASE + 60 == ZFS_IOC_POOL_REGUID && - ZFS_IOC_BASE + 61 == ZFS_IOC_POOL_REOPEN && - ZFS_IOC_BASE + 62 == ZFS_IOC_SEND_PROGRESS && - ZFS_IOC_BASE + 63 == ZFS_IOC_LOG_HISTORY && - ZFS_IOC_BASE + 64 == ZFS_IOC_SEND_NEW && - ZFS_IOC_BASE + 65 == ZFS_IOC_SEND_SPACE && - ZFS_IOC_BASE + 66 == ZFS_IOC_CLONE && - ZFS_IOC_BASE + 67 == ZFS_IOC_BOOKMARK && - ZFS_IOC_BASE + 68 == ZFS_IOC_GET_BOOKMARKS && - ZFS_IOC_BASE + 69 == ZFS_IOC_DESTROY_BOOKMARKS && - ZFS_IOC_BASE + 70 == ZFS_IOC_RECV_NEW && - ZFS_IOC_BASE + 71 == ZFS_IOC_POOL_SYNC && - ZFS_IOC_BASE + 72 == ZFS_IOC_CHANNEL_PROGRAM && - ZFS_IOC_BASE + 73 == ZFS_IOC_LOAD_KEY && - ZFS_IOC_BASE + 74 == ZFS_IOC_UNLOAD_KEY && - ZFS_IOC_BASE + 75 == ZFS_IOC_CHANGE_KEY && - ZFS_IOC_BASE + 76 == ZFS_IOC_REMAP && - ZFS_IOC_BASE + 77 == ZFS_IOC_POOL_CHECKPOINT && - ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT && - ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE && - ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM && - LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT && - LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR && - LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK); + boolean_t result = B_TRUE; + +#define CHECK(expr) do { \ + if (!(expr)) { \ + result = B_FALSE; \ + fprintf(stderr, "(%s) === FALSE\n", #expr); \ + } \ +} while (0) + + CHECK(ZFS_IOC_BASE + 0 == ZFS_IOC_POOL_CREATE); + CHECK(ZFS_IOC_BASE + 1 == ZFS_IOC_POOL_DESTROY); + CHECK(ZFS_IOC_BASE + 2 == ZFS_IOC_POOL_IMPORT); + CHECK(ZFS_IOC_BASE + 3 == ZFS_IOC_POOL_EXPORT); + CHECK(ZFS_IOC_BASE + 4 == ZFS_IOC_POOL_CONFIGS); + CHECK(ZFS_IOC_BASE + 5 == ZFS_IOC_POOL_STATS); + CHECK(ZFS_IOC_BASE + 6 == ZFS_IOC_POOL_TRYIMPORT); + CHECK(ZFS_IOC_BASE + 7 == ZFS_IOC_POOL_SCAN); + CHECK(ZFS_IOC_BASE + 8 == ZFS_IOC_POOL_FREEZE); + CHECK(ZFS_IOC_BASE + 9 == ZFS_IOC_POOL_UPGRADE); + CHECK(ZFS_IOC_BASE + 10 == ZFS_IOC_POOL_GET_HISTORY); + CHECK(ZFS_IOC_BASE + 11 == ZFS_IOC_VDEV_ADD); + CHECK(ZFS_IOC_BASE + 12 == ZFS_IOC_VDEV_REMOVE); + CHECK(ZFS_IOC_BASE + 13 == ZFS_IOC_VDEV_SET_STATE); + CHECK(ZFS_IOC_BASE + 14 == ZFS_IOC_VDEV_ATTACH); + CHECK(ZFS_IOC_BASE + 15 == ZFS_IOC_VDEV_DETACH); + CHECK(ZFS_IOC_BASE + 16 == ZFS_IOC_VDEV_SETPATH); + CHECK(ZFS_IOC_BASE + 17 == ZFS_IOC_VDEV_SETFRU); + CHECK(ZFS_IOC_BASE + 18 == ZFS_IOC_OBJSET_STATS); + CHECK(ZFS_IOC_BASE + 19 == ZFS_IOC_OBJSET_ZPLPROPS); + CHECK(ZFS_IOC_BASE + 20 == ZFS_IOC_DATASET_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 21 == ZFS_IOC_SNAPSHOT_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 22 == ZFS_IOC_SET_PROP); + CHECK(ZFS_IOC_BASE + 23 == ZFS_IOC_CREATE); + CHECK(ZFS_IOC_BASE + 24 == ZFS_IOC_DESTROY); + CHECK(ZFS_IOC_BASE + 25 == ZFS_IOC_ROLLBACK); + CHECK(ZFS_IOC_BASE + 26 == ZFS_IOC_RENAME); + CHECK(ZFS_IOC_BASE + 27 == ZFS_IOC_RECV); + CHECK(ZFS_IOC_BASE + 28 == ZFS_IOC_SEND); + CHECK(ZFS_IOC_BASE + 29 == ZFS_IOC_INJECT_FAULT); + CHECK(ZFS_IOC_BASE + 30 == ZFS_IOC_CLEAR_FAULT); + CHECK(ZFS_IOC_BASE + 31 == ZFS_IOC_INJECT_LIST_NEXT); + CHECK(ZFS_IOC_BASE + 32 == ZFS_IOC_ERROR_LOG); + CHECK(ZFS_IOC_BASE + 33 == ZFS_IOC_CLEAR); + CHECK(ZFS_IOC_BASE + 34 == ZFS_IOC_PROMOTE); + CHECK(ZFS_IOC_BASE + 35 == ZFS_IOC_SNAPSHOT); + CHECK(ZFS_IOC_BASE + 36 == ZFS_IOC_DSOBJ_TO_DSNAME); + CHECK(ZFS_IOC_BASE + 37 == ZFS_IOC_OBJ_TO_PATH); + CHECK(ZFS_IOC_BASE + 38 == ZFS_IOC_POOL_SET_PROPS); + CHECK(ZFS_IOC_BASE + 39 == ZFS_IOC_POOL_GET_PROPS); + CHECK(ZFS_IOC_BASE + 40 == ZFS_IOC_SET_FSACL); + CHECK(ZFS_IOC_BASE + 41 == ZFS_IOC_GET_FSACL); + CHECK(ZFS_IOC_BASE + 42 == ZFS_IOC_SHARE); + CHECK(ZFS_IOC_BASE + 43 == ZFS_IOC_INHERIT_PROP); + CHECK(ZFS_IOC_BASE + 44 == ZFS_IOC_SMB_ACL); + CHECK(ZFS_IOC_BASE + 45 == ZFS_IOC_USERSPACE_ONE); + CHECK(ZFS_IOC_BASE + 46 == ZFS_IOC_USERSPACE_MANY); + CHECK(ZFS_IOC_BASE + 47 == ZFS_IOC_USERSPACE_UPGRADE); + CHECK(ZFS_IOC_BASE + 48 == ZFS_IOC_HOLD); + CHECK(ZFS_IOC_BASE + 49 == ZFS_IOC_RELEASE); + CHECK(ZFS_IOC_BASE + 50 == ZFS_IOC_GET_HOLDS); + CHECK(ZFS_IOC_BASE + 51 == ZFS_IOC_OBJSET_RECVD_PROPS); + CHECK(ZFS_IOC_BASE + 52 == ZFS_IOC_VDEV_SPLIT); + CHECK(ZFS_IOC_BASE + 53 == ZFS_IOC_NEXT_OBJ); + CHECK(ZFS_IOC_BASE + 54 == ZFS_IOC_DIFF); + CHECK(ZFS_IOC_BASE + 55 == ZFS_IOC_TMP_SNAPSHOT); + CHECK(ZFS_IOC_BASE + 56 == ZFS_IOC_OBJ_TO_STATS); + CHECK(ZFS_IOC_BASE + 57 == ZFS_IOC_SPACE_WRITTEN); + CHECK(ZFS_IOC_BASE + 58 == ZFS_IOC_SPACE_SNAPS); + CHECK(ZFS_IOC_BASE + 59 == ZFS_IOC_DESTROY_SNAPS); + CHECK(ZFS_IOC_BASE + 60 == ZFS_IOC_POOL_REGUID); + CHECK(ZFS_IOC_BASE + 61 == ZFS_IOC_POOL_REOPEN); + CHECK(ZFS_IOC_BASE + 62 == ZFS_IOC_SEND_PROGRESS); + CHECK(ZFS_IOC_BASE + 63 == ZFS_IOC_LOG_HISTORY); + CHECK(ZFS_IOC_BASE + 64 == ZFS_IOC_SEND_NEW); + CHECK(ZFS_IOC_BASE + 65 == ZFS_IOC_SEND_SPACE); + CHECK(ZFS_IOC_BASE + 66 == ZFS_IOC_CLONE); + CHECK(ZFS_IOC_BASE + 67 == ZFS_IOC_BOOKMARK); + CHECK(ZFS_IOC_BASE + 68 == ZFS_IOC_GET_BOOKMARKS); + CHECK(ZFS_IOC_BASE + 69 == ZFS_IOC_DESTROY_BOOKMARKS); + CHECK(ZFS_IOC_BASE + 70 == ZFS_IOC_RECV_NEW); + CHECK(ZFS_IOC_BASE + 71 == ZFS_IOC_POOL_SYNC); + CHECK(ZFS_IOC_BASE + 72 == ZFS_IOC_CHANNEL_PROGRAM); + CHECK(ZFS_IOC_BASE + 73 == ZFS_IOC_LOAD_KEY); + CHECK(ZFS_IOC_BASE + 74 == ZFS_IOC_UNLOAD_KEY); + CHECK(ZFS_IOC_BASE + 75 == ZFS_IOC_CHANGE_KEY); + CHECK(ZFS_IOC_BASE + 76 == ZFS_IOC_REMAP); + CHECK(ZFS_IOC_BASE + 77 == ZFS_IOC_POOL_CHECKPOINT); + CHECK(ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT); + CHECK(ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE); + CHECK(ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM); + CHECK(LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT); + CHECK(LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); + CHECK(LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK); + +#undef CHECK + + return (result); } int diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c index b9915d5d31eb..458d6d8e402b 100644 --- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c +++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c @@ -43,7 +43,7 @@ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous * txg "n" completed. * 3. context #1 call uiomove to write, however page fault is occurred in - * uiomove, which means it need mm_sem, but mm_sem is hold by + * uiomove, which means it needs mm_sem, but mm_sem is hold by * context #2, so it stuck and can't complete, then txg "n" will not * complete. * diff --git a/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c b/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c index 7986851efae2..e262ecefea92 100644 --- a/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c +++ b/tests/zfs-tests/cmd/rm_lnkcnt_zero_file/rm_lnkcnt_zero_file.c @@ -47,7 +47,6 @@ #include #include -static const int TRUE = 1; static char *filebase; static int @@ -65,7 +64,7 @@ mover(void *a) len = strlen(filebase) + 5; - while (TRUE) { + for (;;) { idx = pickidx(); (void) snprintf(buf, len, "%s.%03d", filebase, idx); ret = rename(filebase, buf); @@ -85,7 +84,7 @@ cleaner(void *a) len = strlen(filebase) + 5; - while (TRUE) { + for (;;) { idx = pickidx(); (void) snprintf(buf, len, "%s.%03d", filebase, idx); ret = remove(buf); @@ -102,7 +101,7 @@ writer(void *a) int *fd = (int *)a; int ret; - while (TRUE) { + for (;;) { if (*fd != -1) (void) close (*fd); @@ -143,7 +142,7 @@ main(int argc, char **argv) (void) pthread_create(&tid, NULL, cleaner, NULL); (void) pthread_create(&tid, NULL, writer, (void *) &fd); - while (TRUE) { + for (;;) { int ret; struct stat st; diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 9cac7184f9fc..af3324683b0d 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -12,12 +12,13 @@ # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2012, 2019 by Delphix. All rights reserved. # Copyright 2016 Nexenta Systems, Inc. # Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. # Copyright (c) 2017 Lawrence Livermore National Security, LLC. # Copyright (c) 2017 Datto Inc. # Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +# Copyright 2019 Richard Elling # # @@ -55,12 +56,29 @@ function scan_scsi_hosts # # Wait for newly created block devices to have their minors created. +# Additional arguments can be passed to udevadm trigger, with the expected +# arguments to typically be a block device pathname. This is useful when +# checking waiting on a specific device to settle rather than triggering +# all devices and waiting for them all to settle. +# +# The udevadm settle timeout can be 120 or 180 seconds by default for +# some distros. If a long delay is experienced, it could be due to some +# strangeness in a malfunctioning device that isn't related to the devices +# under test. To help debug this condition, a notice is given if settle takes +# too long. +# +# Note: there is no meaningful return code if udevadm fails. Consumers +# should not expect a return code (do not call as argument to log_must) # function block_device_wait { if is_linux; then - udevadm trigger + udevadm trigger $* + typeset local start=$SECONDS udevadm settle + typeset local elapsed=$((SECONDS - start)) + [[ $elapsed > 60 ]] && \ + log_note udevadm settle time too long: $elapsed fi } @@ -113,7 +131,7 @@ function is_loop_device #disk } # -# Check if the given device is a multipath device and if there is a sybolic +# Check if the given device is a multipath device and if there is a symbolic # link to a device mapper and to a disk # Currently no support for dm devices alone without multipath # @@ -447,3 +465,89 @@ function get_pool_devices #testpool #devdir fi echo $out } + +# +# Write to standard out giving the level, device name, offset and length +# of all blocks in an input file. The offset and length are in units of +# 512 byte blocks. In the case of mirrored vdevs, only the first +# device is listed, as the levels, blocks and offsets will be the same +# on other devices. Note that this function only works with mirrored +# or non-redundant pools, not raidz. +# +# The output of this function can be used to introduce corruption at +# varying levels of indirection. +# +function list_file_blocks # input_file +{ + typeset input_file=$1 + + [[ -f $input_file ]] || log_fail "Couldn't find $input_file" + + typeset ds="$(zfs list -H -o name $input_file)" + typeset pool="${ds%%/*}" + typeset inum="$(stat -c '%i' $input_file)" + + # + # Establish a mapping between vdev ids as shown in a DVA and the + # pathnames they correspond to in ${VDEV_MAP[]}. + # + eval $(zdb -C $pool | awk ' + BEGIN { + printf("typeset VDEV_MAP\n"); + looking = 0; + } + /^ children/ { + id = $1; + looking = 1; + } + /path: / && looking == 1 { + print id" "$2; + looking = 0; + } + ' | sed -n 's/^children\[\([0-9]\)\]: \(.*\)$/VDEV_MAP[\1]=\2/p') + + # + # The awk below parses the output of zdb, printing out the level + # of each block along with vdev id, offset and length. The last + # two are converted to decimal in the while loop. 4M is added to + # the offset to compensate for the first two labels and boot + # block. Lastly, the offset and length are printed in units of + # 512b blocks for ease of use with dd. + # + log_must zpool sync -f + typeset level path offset length + zdb -ddddd $ds $inum | awk -F: ' + BEGIN { looking = 0 } + /^Indirect blocks:/ { looking = 1} + /^\t\tsegment / { looking = 0} + /L[0-8]/ && looking == 1 { print $0} + ' | sed -n 's/^.*\(L[0-9]\) \([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \ + while read level path offset length; do + offset=$((16#$offset)) # Conversion from hex + length=$((16#$length)) + offset="$(((offset + 4 * 1024 * 1024) / 512))" + length="$((length / 512))" + echo "$level ${VDEV_MAP[$path]} $offset $length" + done 2>/dev/null +} + +function corrupt_blocks_at_level # input_file corrupt_level +{ + typeset input_file=$1 + typeset corrupt_level="L${2:-0}" + typeset level path offset length + + [[ -f $input_file ]] || log_fail "Couldn't find $input_file" + + + log_must list_file_blocks $input_file | \ + while read level path offset length; do + if [[ $level = $corrupt_level ]]; then + log_must dd if=/dev/urandom of=$path bs=512 \ + count=$length seek=$offset conv=notrunc + fi + done + + # This is necessary for pools made of loop devices. + sync +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 127a1477d426..4d98e7c11a67 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -33,6 +33,7 @@ export SYSTEM_FILES='arp diff dirname dmesg + dmidecode du echo egrep @@ -95,6 +96,7 @@ export SYSTEM_FILES='arp ps pwd python + python2 python3 quotaon readlink diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 57d0880cc9bb..cd593b6f2583 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -856,12 +856,13 @@ function zero_partitions # # Size should be specified with units as per # the `format` command requirements eg. 100mb 3gb # -# NOTE: This entire interface is problematic for the Linux parted utilty +# NOTE: This entire interface is problematic for the Linux parted utility # which requires the end of the partition to be specified. It would be # best to retire this interface and replace it with something more flexible. # At the moment a best effort is made. # -function set_partition # +# arguments: +function set_partition { typeset -i slicenum=$1 typeset start=$2 @@ -872,6 +873,7 @@ function set_partition # /dev/null + parted $disk -s -- print 1 >/dev/null typeset ret_val=$? if [[ $slicenum -eq 0 || $ret_val -ne 0 ]]; then - parted $DEV_DSKDIR/$disk -s -- mklabel gpt + parted $disk -s -- mklabel gpt if [[ $? -ne 0 ]]; then log_note "Failed to create GPT partition table on $disk" return 1 @@ -899,20 +901,21 @@ function set_partition # /dev/null - block_device_wait + blockdev --rereadpt $disk 2>/dev/null + block_device_wait $disk else if [[ -z $slicenum || -z $size || -z $disk ]]; then log_fail "The slice, size or disk name is unspecified." @@ -932,10 +935,10 @@ function set_partition # > $format_file format -e -s -d $disk -f $format_file + typeset ret_val=$? + rm -f $format_file fi - typeset ret_val=$? - rm -f $format_file if [[ $ret_val -ne 0 ]]; then log_note "Unable to format $disk slice $slicenum to $size" return 1 @@ -950,61 +953,26 @@ function set_partition # /dev/null 2>&1 - if (( $? == 1 )); then - lsblk | egrep ${DISK}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 1 )); then - log_note "Partitions for $DISK should be deleted" - else - log_fail "Partition for ${DISK}${SLICE_PREFIX}${j} not deleted" - fi - return 0 + typeset -i part + for disk in $DISKSARRAY; do + for (( part = 1; part < MAX_PARTITIONS; part++ )); do + typeset partition=${disk}${SLICE_PREFIX}${part} + parted $DEV_DSKDIR/$disk -s rm $part > /dev/null 2>&1 + if lsblk | grep -qF ${partition}; then + log_fail "Partition ${partition} not deleted" else - lsblk | egrep ${DISK}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 0 )); then - log_fail "Partition for ${DISK}${SLICE_PREFIX}${j} not deleted" - fi + log_note "Partition ${partition} deleted" fi - ((j = j+1)) done - else - for disk in `echo $DISKSARRAY`; do - while ((j < MAX_PARTITIONS)); do - parted $DEV_DSKDIR/$disk -s rm $j > /dev/null 2>&1 - if (( $? == 1 )); then - lsblk | egrep ${disk}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 1 )); then - log_note "Partitions for $disk should be deleted" - else - log_fail "Partition for ${disk}${SLICE_PREFIX}${j} not deleted" - fi - j=7 - else - lsblk | egrep ${disk}${SLICE_PREFIX}${j} > /dev/null - if (( $? == 0 )); then - log_fail "Partition for ${disk}${SLICE_PREFIX}${j} not deleted" - fi - fi - ((j = j+1)) - done - j=1 - done - fi + done fi - return 0 } # @@ -1091,7 +1059,7 @@ function partition_disk # # dirnum: the maximum number of subdirectories to use, -1 no limit # filenum: the maximum number of files per subdirectory # bytes: number of bytes to write -# num_writes: numer of types to write out bytes +# num_writes: number of types to write out bytes # data: the data that will be written # # E.g. @@ -2234,10 +2202,11 @@ function cleanup_devices #vdevs { typeset pool="foopool$$" - if poolexists $pool ; then - destroy_pool $pool - fi + for vdev in $@; do + zero_partitions $vdev + done + poolexists $pool && destroy_pool $pool create_pool $pool $@ destroy_pool $pool @@ -2387,7 +2356,7 @@ function del_user # fi if id $user > /dev/null 2>&1; then - log_must_retry "currently used" 5 userdel $user + log_must_retry "currently used" 6 userdel $user fi [[ -d $basedir/$user ]] && rm -fr $basedir/$user @@ -2878,7 +2847,7 @@ function labelvtoc # # check if the system was installed as zfsroot or not -# return: 0 ture, otherwise false +# return: 0 if zfsroot, non-zero if not # function is_zfsroot { @@ -3491,13 +3460,13 @@ function set_tunable_impl Linux) typeset zfs_tunables="/sys/module/$module/parameters" [[ -w "$zfs_tunables/$tunable" ]] || return 1 - echo -n "$value" > "$zfs_tunables/$tunable" - return "$?" + cat >"$zfs_tunables/$tunable" <<<"$value" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 echo "${tunable}/${mdb_cmd}0t${value}" | mdb -kw - return "$?" + return $? ;; esac } @@ -3524,7 +3493,7 @@ function get_tunable_impl typeset zfs_tunables="/sys/module/$module/parameters" [[ -f "$zfs_tunables/$tunable" ]] || return 1 cat $zfs_tunables/$tunable - return "$?" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 @@ -3594,3 +3563,25 @@ function mdb_ctf_set_int return 0 } + +# +# Compute MD5 digest for given file or stdin if no file given. +# Note: file path must not contain spaces +# +function md5digest +{ + typeset file=$1 + + md5sum -b $file | awk '{ print $1 }' +} + +# +# Compute SHA256 digest for given file or stdin if no file given. +# Note: file path must not contain spaces +# +function sha256digest +{ + typeset file=$1 + + sha256sum -b $file | awk '{ print $1 }' +} diff --git a/tests/zfs-tests/include/zpool_script.shlib b/tests/zfs-tests/include/zpool_script.shlib old mode 100755 new mode 100644 diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index da27673ec946..a1fe06c165f5 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -59,6 +59,7 @@ SUBDIRS = \ rename_dirs \ replacement \ reservation \ + resilver \ rootpool \ rsend \ scrub_mirror \ @@ -66,6 +67,7 @@ SUBDIRS = \ snapshot \ snapused \ sparse \ + suid \ threadsappend \ tmpfile \ trim \ diff --git a/tests/zfs-tests/tests/functional/acl/acl_common.kshlib b/tests/zfs-tests/tests/functional/acl/acl_common.kshlib index a81cd76ba6aa..ba08bcb48bef 100644 --- a/tests/zfs-tests/tests/functional/acl/acl_common.kshlib +++ b/tests/zfs-tests/tests/functional/acl/acl_common.kshlib @@ -34,7 +34,7 @@ # # Get the given file/directory access mode # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_mode # { @@ -49,7 +49,7 @@ function get_mode # # # Get the given file/directory ACL # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_acl # { @@ -64,7 +64,7 @@ function get_acl # # # Get the given file/directory ACL # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_compact_acl # { @@ -243,12 +243,12 @@ function usr_exec # [...] # # Count how many ACEs for the specified file or directory. # -# $1 file or directroy name +# $1 file or directory name # function count_ACE # { if [[ ! -e $1 ]]; then - log_note "Need input file or directroy name." + log_note "Need input file or directory name." return 1 fi @@ -399,7 +399,7 @@ function rwx_node #user node acl_spec|access # # Get the given file/directory xattr # -# $1 object -- file or directroy +# $1 object -- file or directory # function get_xattr # { diff --git a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am index 073eac9882e6..7cffb2eac450 100644 --- a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am +++ b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am @@ -1,7 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/alloc_class dist_pkgdata_SCRIPTS = \ - alloc_class.cfg \ - alloc_class.kshlib \ setup.ksh \ cleanup.ksh \ alloc_class_001_pos.ksh \ @@ -17,3 +15,7 @@ dist_pkgdata_SCRIPTS = \ alloc_class_011_neg.ksh \ alloc_class_012_pos.ksh \ alloc_class_013_pos.ksh + +dist_pkgdata_DATA = \ + alloc_class.cfg \ + alloc_class.kshlib diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh index 441df8296718..3237d7cb784f 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_001_pos.ksh @@ -20,7 +20,8 @@ # # DESCRIPTION: -# Creating a pool with a special device succeeds. +# Creating a pool with a special device succeeds, but only if +# "feature@allocation_classes" is enabled. # verify_runnable "global" @@ -31,6 +32,9 @@ log_assert $claim log_onexit cleanup log_must disk_setup +for type in special dedup; do + log_mustnot zpool create -d $TESTPOOL $CLASS_DISK0 $type $CLASS_DISK1 +done log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ $CLASS_DISK0 $CLASS_DISK1 log_must display_status "$TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh index dcc6f7607c9b..79ac9364c257 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh @@ -52,7 +52,7 @@ do log_must zpool create $TESTPOOL $type $ZPOOL_DISKS \ special $stype $sdisks - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | nawk '{print $2}')" if [ "$ac_value" = "active" ]; then log_note "feature@allocation_classes is active" diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh index 417c68aa739b..337114cdb59e 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh @@ -41,7 +41,7 @@ do else log_must zpool create $TESTPOOL $type $ZPOOL_DISKS fi - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | awk '{print $2}')" if [ "$ac_value" = "enabled" ]; then log_note "feature@allocation_classes is enabled" @@ -56,7 +56,7 @@ do log_must zpool add $TESTPOOL special mirror \ $CLASS_DISK0 $CLASS_DISK1 fi - ac_value="$(zpool get all -H -o property,value | \ + ac_value="$(zpool get -H -o property,value all | \ egrep allocation_classes | awk '{print $2}')" if [ "$ac_value" = "active" ]; then log_note "feature@allocation_classes is active" diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh index fe1ae366a6d3..d804e5371ebb 100755 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_011_neg.ksh @@ -21,6 +21,7 @@ # # DESCRIPTION: # Setting the special_small_blocks property to invalid values fails. +# Powers of two from 512 to 1M are allowed. # verify_runnable "global" @@ -34,7 +35,7 @@ log_must disk_setup log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \ $CLASS_DISK0 $CLASS_DISK1 -for value in 256 1025 262144 +for value in 256 1025 2097152 do log_mustnot zfs set special_small_blocks=$value $TESTPOOL done diff --git a/tests/zfs-tests/tests/functional/arc/Makefile.am b/tests/zfs-tests/tests/functional/arc/Makefile.am index dc57ebc86275..809d0346f872 100644 --- a/tests/zfs-tests/tests/functional/arc/Makefile.am +++ b/tests/zfs-tests/tests/functional/arc/Makefile.am @@ -2,5 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/arc dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ + arcstats_runtime_tuning.ksh \ dbufstats_001_pos.ksh \ - dbufstats_002_pos.ksh + dbufstats_002_pos.ksh \ + dbufstats_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh new file mode 100755 index 000000000000..6d007aecf845 --- /dev/null +++ b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh @@ -0,0 +1,46 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/perf/perf.shlib + +function cleanup +{ + # Set tunables to their recorded actual size and then to their original + # value: this works for previously unconfigured tunables. + log_must set_tunable64 zfs_arc_min "$MINSIZE" + log_must set_tunable64 zfs_arc_min "$ZFS_ARC_MIN" + log_must set_tunable64 zfs_arc_max "$MAXSIZE" + log_must set_tunable64 zfs_arc_max "$ZFS_ARC_MAX" +} + +log_onexit cleanup + +ZFS_ARC_MAX="$(get_tunable zfs_arc_max)" +ZFS_ARC_MIN="$(get_tunable zfs_arc_min)" +MINSIZE="$(get_min_arc_size)" +MAXSIZE="$(get_max_arc_size)" + +log_assert "ARC tunables should be updated dynamically" + +for size in $((MAXSIZE/4)) $((MAXSIZE/3)) $((MAXSIZE/2)) $MAXSIZE; do + log_must set_tunable64 zfs_arc_max "$size" + log_must test "$(get_max_arc_size)" == "$size" + log_must set_tunable64 zfs_arc_min "$size" + log_must test "$(get_min_arc_size)" == "$size" +done + +log_pass "ARC tunables can be updated dynamically" diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh b/tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh similarity index 90% rename from tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh rename to tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh index 0e187015f8d6..91cec74881a6 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/dbufstat_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/arc/dbufstats_003_pos.ksh @@ -33,11 +33,11 @@ log_assert "dbufstat generates output and doesn't return an error code" typeset -i i=0 while [[ $i -lt ${#args[*]} ]]; do - log_must eval "sudo dbufstat ${args[i]} > /dev/null" + log_must eval "dbufstat ${args[i]} >/dev/null" ((i = i + 1)) done # A simple test of dbufstat filter functionality -log_must eval "sudo dbufstat -F object=10,dbc=1,pool=$TESTPOOL > /dev/null" +log_must eval "dbufstat -F object=10,dbc=1,pool=$TESTPOOL >/dev/null" log_pass "dbufstat generates output and doesn't return an error code" diff --git a/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh b/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh index e0b81e166279..841b141e16fc 100755 --- a/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cachefile/cachefile_004_pos.ksh @@ -38,9 +38,9 @@ # Verify set, export and destroy when cachefile is set on pool. # # STRATEGY: -# 1. Create two pools with one same cahcefile1. +# 1. Create two pools with one same cachefile1. # 2. Set cachefile of the two pools to another same cachefile2. -# 3. Verify cachefile1 not exist. +# 3. Verify cachefile1 does not exist. # 4. Export the two pools. # 5. Verify cachefile2 not exist. # 6. Import the two pools and set cachefile to cachefile2. diff --git a/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg b/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg index 9e8e456863b4..5d2efbf000b1 100644 --- a/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg +++ b/tests/zfs-tests/tests/functional/casenorm/casenorm.cfg @@ -17,12 +17,16 @@ # Copyright (c) 2016 by Delphix. All rights reserved. # -NAME_C_ORIG=$(echo 'F\0303\0257L\0303\0253N\0303\0204m\0303\0253') -NAME_C_UPPER=$(echo 'F\0303\0217L\0303\0213N\0303\0204M\0303\0213') -NAME_C_LOWER=$(echo 'f\0303\0257l\0303\0253n\0303\0244m\0303\0253') -NAME_D_ORIG=$(echo 'Fi\0314\0210Le\0314\0210NA\0314\0210me\0314\0210') -NAME_D_UPPER=$(echo 'FI\0314\0210LE\0314\0210NA\0314\0210ME\0314\0210') -NAME_D_LOWER=$(echo 'fi\0314\0210le\0314\0210na\0314\0210me\0314\0210') +# Ksh on linux may have locale env variables undefined +export LANG="C.UTF-8" +export LC_ALL="C.UTF-8" + +NAME_C_ORIG=$(printf '\u0046\u00ef\u004c\u00eb\u004e\u00c4\u006d\u00eb') +NAME_C_UPPER=$(printf '\u0046\u00cf\u004c\u00cb\u004e\u00c4\u004d\u00cb') +NAME_C_LOWER=$(printf '\u0066\u00ef\u006c\u00eb\u006e\u00e4\u006d\u00eb') +NAME_D_ORIG=$(printf '\u0046\u0069\u0308\u004c\u0065\u0308\u004e\u0041\u0308\u006d\u0065\u0308') +NAME_D_UPPER=$(printf '\u0046\u0049\u0308\u004c\u0045\u0308\u004e\u0041\u0308\u004d\u0045\u0308') +NAME_D_LOWER=$(printf '\u0066\u0069\u0308\u006c\u0065\u0308\u006e\u0061\u0308\u006d\u0065\u0308') NAMES_ORIG="$NAME_C_ORIG $NAME_D_ORIG" NAMES_UPPER="$NAME_C_UPPER $NAME_D_UPPER" NAMES_LOWER="$NAME_C_LOWER $NAME_D_LOWER" diff --git a/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib b/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib index 273522406b6f..5b080165b9f1 100644 --- a/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib +++ b/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib @@ -65,14 +65,22 @@ function lookup_file { typeset name=$1 - zlook -l $TESTDIR $name >/dev/null 2>&1 + if is_linux; then + test -f "${TESTDIR}/${name}" >/dev/null 2>&1 + else + zlook -l $TESTDIR $name >/dev/null 2>&1 + fi } function lookup_file_ci { typeset name=$1 - zlook -il $TESTDIR $name >/dev/null 2>&1 + if is_linux; then + test -f "${TESTDIR}/${name}" >/dev/null 2>&1 + else + zlook -il $TESTDIR $name >/dev/null 2>&1 + fi } function lookup_any diff --git a/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh b/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh index d28431300a30..1ef9d2756fc8 100755 --- a/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh +++ b/tests/zfs-tests/tests/functional/casenorm/insensitive_formd_lookup.ksh @@ -19,7 +19,7 @@ # DESCRIPTION: # For the filesystem with casesensitivity=insensitive, normalization=formD, -# check that lookup succeds using any name form. +# check that lookup succeeds using any name form. # # STRATEGY: # For each c/n name form: diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am index 7bdaf53de2fa..cc86a2db9193 100644 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am @@ -27,7 +27,8 @@ dist_pkgdata_SCRIPTS = \ tst.snapshot_destroy.ksh \ tst.snapshot_neg.ksh \ tst.snapshot_recursive.ksh \ - tst.snapshot_simple.ksh + tst.snapshot_simple.ksh \ + tst.terminate_by_signal.ksh dist_pkgdata_DATA = \ tst.get_index_props.out \ diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh index 910dddc03f7f..2f5d214ebbf0 100755 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh @@ -95,4 +95,4 @@ log_must_program $TESTPOOL - <<-EOF return 0 EOF -log_pass "Listing zfs user properies should work correctly." +log_pass "Listing zfs user properties should work correctly." diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh new file mode 100755 index 000000000000..0a5fb804ac39 --- /dev/null +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# +. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib + +# +# DESCRIPTION: Execute a long-running zfs channel program and attempt to +# cancel it by sending a signal. +# + +verify_runnable "global" + +rootfs=$TESTPOOL/$TESTFS +snapname=snap +limit=50000000 + +function cleanup +{ + datasetexists $rootfs && log_must zfs destroy -R $rootfs +} + +log_onexit cleanup + +# +# Create a working set of 100 file systems +# +for i in {1..100}; do + log_must zfs create "$rootfs/child$i" +done + +# +# Attempt to create 100 snapshots with zfs.sync.snapshot() along with some +# time consuming efforts. We use loops of zfs.check.* (dry run operations) +# to consume instructions before the next zfs.sync.snapshot() occurs. +# +# Without a signal interruption this ZCP would take several minutes and +# generate over 30 million Lua instructions. +# +function chan_prog +{ +zfs program -t $limit $TESTPOOL - $rootfs $snapname <<-EOF + arg = ... + fs = arg["argv"][1] + snap = arg["argv"][2] + for child in zfs.list.children(fs) do + local snapname = child .. "@" .. snap + zfs.check.snapshot(snapname) + zfs.sync.snapshot(snapname) + for i=1,20000,1 do + zfs.check.snapshot(snapname) + zfs.check.destroy(snapname) + zfs.check.destroy(fs) + end + end + return "should not have reached here" +EOF +} + +log_note "Executing a long-running zfs program in the background" +chan_prog & +CHILD=$! + +# +# After waiting, send a kill signal to the channel program process. +# This should stop the ZCP near a million instructions but still have +# created some of the snapshots. Note that since the above zfs program +# command might get wrapped, we also issue a kill to the group. +# +sleep 10 +log_pos pkill -P $CHILD +log_pos kill $CHILD + +# +# Make sure the channel program did not fully complete by enforcing +# that not all of the snapshots were created. +# +snap_count=$(zfs list -t snapshot | grep $TESTPOOL | wc -l) +log_note "$snap_count snapshots created by ZCP" + +if [ "$snap_count" -eq 0 ]; then + log_fail "Channel program failed to run." +elif [ "$snap_count" -gt 90 ]; then + log_fail "Too many snapshots after a cancel ($snap_count)." +else + log_pass "Canceling a long-running channel program works." +fi diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am index f72546b22590..905d991ed75f 100644 --- a/tests/zfs-tests/tests/functional/checksum/Makefile.am +++ b/tests/zfs-tests/tests/functional/checksum/Makefile.am @@ -1,7 +1,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include -LDADD = $(top_srcdir)/lib/libicp/libicp.la +LDADD = $(top_builddir)/lib/libicp/libicp.la AUTOMAKE_OPTIONS = subdir-objects diff --git a/tests/zfs-tests/tests/functional/checksum/edonr_test.c b/tests/zfs-tests/tests/functional/checksum/edonr_test.c index a2a924e5d89a..596ef2b33681 100644 --- a/tests/zfs-tests/tests/functional/checksum/edonr_test.c +++ b/tests/zfs-tests/tests/functional/checksum/edonr_test.c @@ -42,6 +42,8 @@ typedef enum boolean { B_FALSE, B_TRUE } boolean_t; typedef unsigned long long u_longlong_t; +int aok = 0; + /* * Test messages from: * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh index ccc60a661d0e..27dad072631d 100755 --- a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh @@ -21,7 +21,7 @@ # # -# Copyright (c) 2018 by Delphix. All rights reserved. +# Copyright (c) 2018, 2019 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -32,8 +32,8 @@ # Sanity test to make sure checksum algorithms work. # For each checksum, create a file in the pool using that checksum. Verify # that there are no checksum errors. Next, for each checksum, create a single -# file in the pool using that checksum, scramble the underlying vdev, and -# verify that we correctly catch the checksum errors. +# file in the pool using that checksum, corrupt the file, and verify that we +# correctly catch the checksum errors. # # STRATEGY: # Test 1 @@ -46,11 +46,9 @@ # Test 2 # 6. For each checksum: # 7. Create a file using the checksum -# 8. Export the pool -# 9. Scramble the data on one of the underlying VDEVs -# 10. Import the pool -# 11. Scrub the pool -# 12. Verify that there are checksum errors +# 8. Corrupt all level 0 blocks in the file +# 9. Scrub the pool +# 10. Verify that there are checksum errors verify_runnable "both" @@ -66,8 +64,6 @@ log_assert "Create and read back files with using different checksum algorithms" log_onexit cleanup WRITESZ=1048576 -SKIPCNT=$(((4194304 / $WRITESZ) * 2)) -WRITECNT=$((($MINVDEVSIZE / $WRITESZ) - $SKIPCNT)) # Get a list of vdevs in our pool set -A array $(get_disklist_fullpath) @@ -96,7 +92,7 @@ log_must [ $cksum -eq 0 ] rm -fr $TESTDIR/* -log_assert "Test scrambling the disk and seeing checksum errors" +log_assert "Test corrupting the files and seeing checksum errors" typeset -i j=1 while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do type=${CHECKSUM_TYPES[$j]} @@ -104,14 +100,9 @@ while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do log_must file_write -o overwrite -f $TESTDIR/test_$type \ -b $WRITESZ -c 5 -d R - log_must zpool export $TESTPOOL + # Corrupt the level 0 blocks of this file + corrupt_blocks_at_level $TESTDIR/test_$type - # Scramble the data on the first vdev in our pool. Skip the first - # and last 16MB of data, then scramble the rest after that. - log_must dd if=/dev/zero of=$firstvdev bs=$WRITESZ skip=$SKIPCNT \ - count=$WRITECNT - - log_must zpool import $TESTPOOL log_must zpool scrub $TESTPOOL log_must wait_scrubbed $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am index d37bcf607f46..9f143078f18f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am @@ -5,4 +5,6 @@ dist_pkgdata_SCRIPTS = \ zdb_003_pos.ksh \ zdb_004_pos.ksh \ zdb_005_pos.ksh \ - zdb_006_pos.ksh + zdb_006_pos.ksh \ + zdb_checksum.ksh \ + zdb_decompress.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh index a5f827b5642f..e69779bd4b4c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh @@ -59,7 +59,7 @@ set -A args "create" "add" "destroy" "import fakepool" \ "-a" "-f" "-g" "-h" "-j" "-m" "-n" "-o" "-p" \ "-p /tmp" "-r" "-t" "-w" "-x" "-y" "-z" \ "-D" "-E" "-G" "-H" "-I" "-J" "-K" "-M" \ - "-N" "-Q" "-R" "-S" "-T" "-W" "-Y" "-Z" + "-N" "-Q" "-R" "-S" "-T" "-W" "-Z" log_assert "Execute zdb using invalid parameters." diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh new file mode 100755 index 000000000000..9bc3603d46a0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# zdb -c will display the same checksum as -ddddddbbbbbb +# +# Strategy: +# 1. Create a pool +# 2. Write some data to a file +# 3. Run zdb -ddddddbbbbbb against the file +# 4. Record the checksum and DVA of L0 block 0 +# 5. Run zdb -R with :c flag and match the checksum + + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify zdb -R generates the correct checksum." +log_onexit cleanup +init_data=$TESTDIR/file1 +write_count=8 +blksize=131072 +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +file_write -o create -w -f $init_data -b $blksize -c $write_count + +# get object number of file +listing=$(ls -i $init_data) +set -A array $listing +obj=${array[0]} +log_note "file $init_data has object number $obj" + +output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \ + |grep -m 1 "L0 DVA" |head -n1) +dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output") +log_note "block 0 of $init_data has a DVA of $dva" +cksum_expected=$(grep -oP '(?<=cksum=)[ A-Za-z0-9:]*' <<< "$output") +log_note "expecting cksum $cksum_expected" +output=$(zdb -R $TESTPOOL $dva:c 2> /dev/null) +result=$(grep $cksum_expected <<< "$output") +(( $? != 0 )) && log_fail "zdb -R failed to print the correct checksum" + +log_pass "zdb -R generates the correct checksum" diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh new file mode 100755 index 000000000000..0e468d7c9872 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# zdb -R pool :d will display the correct data and length +# +# Strategy: +# 1. Create a pool, set compression to lzjb +# 2. Write some identifiable data to a file +# 3. Run zdb -ddddddbbbbbb against the file +# 4. Record the DVA, lsize, and psize of L0 block 0 +# 5. Run zdb -R with :d flag and match the data +# 6. Run zdb -R with :dr flags and match the lsize/psize +# 7. Run zdb -R with :dr flags and match the lsize +# 8. Run zdb -R with :dr flags and match the psize +# + + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Verify zdb -R :d flag (decompress) works as expected" +log_onexit cleanup +init_data=$TESTDIR/file1 +write_count=256 +blksize=4096 +pattern="_match__pattern_" +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +log_must zfs set recordsize=$blksize $TESTPOOL/$TESTFS +log_must zfs set compression=lzjb $TESTPOOL/$TESTFS + +# 16 chars 256 times = 4k = block size +typeset four_k="" +for i in {1..$write_count} +do + four_k=$four_k$pattern +done + +# write the 4k block 256 times +for i in {1..$write_count} +do + echo $four_k >> $init_data +done + +sync_pool $TESTPOOL true + +# get object number of file +listing=$(ls -i $init_data) +set -A array $listing +obj=${array[0]} +log_note "file $init_data has object number $obj" + +output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \ + |grep -m 1 "L0 DVA" |head -n1) +dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output") +log_note "block 0 of $init_data has a DVA of $dva" + +# use the length reported by zdb -ddddddbbbbbb +size_str=$(grep -oP 'size=\K.*?(?= )' <<< "$output") +log_note "block size $size_str" + +vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}') +offset=$(echo "$dva" |awk '{split($0,array,":")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:d 2> /dev/null) +echo $output |grep $pattern > /dev/null +(( $? != 0 )) && log_fail "zdb -R :d failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +echo $output |grep $four_k > /dev/null +(( $? != 0 )) && log_fail "zdb -R :dr failed to decompress the data properly" + +output=$(zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data to the length (${#output} != $size_str)" + +# decompress using lsize +lsize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[1]}') +psize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[2]}') +output=$(zdb -R $TESTPOOL $vdev:$offset:$lsize:dr 2> /dev/null) +result=${#output} +(( $result != $blksize)) && log_fail \ +"zdb -R failed to decompress the data (length ${#output} != $blksize)" + +# Specifying psize will decompress successfully , but not always to full +# lsize since zdb has to guess lsize incrementally. +output=$(zdb -R $TESTPOOL $vdev:$offset:$psize:dr 2> /dev/null) +result=${#output} +# convert psize to decimal +psize_orig=$psize +psize=${psize%?} +psize=$((16#$psize)) +(( $result < $psize)) && log_fail \ +"zdb -R failed to decompress the data with psize $psize_orig\ + (length ${#output} < $psize)" + +log_pass "zdb -R :d flag (decompress) works as expected" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh index b21b6c657dfe..92382aa11d63 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh @@ -62,7 +62,7 @@ log_assert "With ZFS_ABORT set, all zfs commands can abort and generate a " \ "core file." log_onexit cleanup -#preparation work for testing +# Preparation work for testing corepath=$TESTDIR/core if [[ -d $corepath ]]; then rm -rf $corepath diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am index 7c67e7239b83..72d6e4700e17 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am @@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zfs_change-key.ksh \ zfs_change-key_child.ksh \ + zfs_change-key_clones.ksh \ zfs_change-key_inherit.ksh \ zfs_change-key_format.ksh \ zfs_change-key_load.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh index dda7c1df433c..a886ab8a7793 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh @@ -28,13 +28,15 @@ # STRATEGY: # 1. Create an encrypted dataset # 2. Create an encrypted child dataset -# 3. Attempt to change the key without any flags -# 4. Attempt to change the key specifying keylocation -# 5. Attempt to change the key specifying keyformat -# 6. Verify the new encryption root can unload and load its key -# 7. Recreate the child dataset -# 8. Attempt to change the key specifying both the keylocation and keyformat -# 9. Verify the new encryption root can unload and load its key +# 3. Create an unencrypted child dataset +# 4. Attempt to change the key without any flags +# 5. Attempt to change the key specifying keylocation +# 6. Attempt to change the key specifying keyformat +# 7. Verify the new encryption root can unload and load its key +# 8. Recreate the child dataset +# 9. Attempt to change the key specifying both the keylocation and keyformat +# 10. Verify the new encryption root can unload and load its key +# 11. Verify the unencrytped child is still accessible normally # verify_runnable "both" @@ -53,6 +55,7 @@ log_assert "'zfs change-key' should promote an encrypted child to an" \ log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" log_must zfs create $TESTPOOL/$TESTFS1/child +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/child2 log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \ "$TESTPOOL/$TESTFS1/child" @@ -82,5 +85,7 @@ log_must key_unavailable $TESTPOOL/$TESTFS1/child log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" log_must key_available $TESTPOOL/$TESTFS1/child +log_must zfs unmount $TESTPOOL/$TESTFS1/child2 +log_must zfs mount $TESTPOOL/$TESTFS1/child2 log_pass "'zfs change-key' promotes an encrypted child to an encryption root" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh new file mode 100755 index 000000000000..497fb99c8102 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should correctly update encryption roots with clones. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create an encryption root child of the first dataset +# 3. Clone the child encryption root twice +# 4. Add inheriting children to the encryption root and each of the clones +# 5. Verify the encryption roots +# 6. Have the child encryption root inherit from its parent +# 7. Verify the encryption root for all datasets is now the parent dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -Rf $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs change-key' should correctly update encryption " \ + "roots with clones" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE2 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/child" +log_must zfs snapshot $TESTPOOL/$TESTFS1/child@1 +log_must zfs clone $TESTPOOL/$TESTFS1/child@1 $TESTPOOL/$TESTFS1/clone1 +log_must zfs clone $TESTPOOL/$TESTFS1/child@1 $TESTPOOL/$TESTFS1/clone2 +log_must zfs create $TESTPOOL/$TESTFS1/child/A +log_must zfs create $TESTPOOL/$TESTFS1/clone1/B +log_must zfs create $TESTPOOL/$TESTFS1/clone2/C + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1 $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2 $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child/A $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1/B $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2/C $TESTPOOL/$TESTFS1/child + +log_must zfs change-key -i $TESTPOOL/$TESTFS1/child + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child/A $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone1/B $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/clone2/C $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key' correctly updates encryption roots with clones" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh index 40cabf649d11..dcf80095db28 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh @@ -143,33 +143,29 @@ datasets="$TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1/$TESTFS2 typeset -a d_clones typeset -a deferred_snaps typeset -i i -i=1 log_must setup_ds log_note "Verify zfs clone property for multiple clones" names=$(zfs list -rt all -o name $TESTPOOL) log_must verify_clones 3 0 -log_note "verfify clone property for clone deletion" +log_note "verify clone property for clone deletion" i=1 for ds in $datasets; do log_must zfs destroy $ds/$TESTCLONE.$i ((i=i+1)) done names=$(zfs list -rt all -o name $TESTPOOL) -i=1 log_must verify_clones 2 1 log_must local_cleanup log_must setup_ds log_note "verify zfs deferred destroy on clones property" -i=1 names=$(zfs list -rt all -o name $TESTPOOL) for ds in $datasets; do log_must zfs destroy -d $ds@snap deferred_snaps=( "${deferred_snaps[@]}" "$ds@snap" ) - ((i=i+1)) done log_must verify_clones 3 0 @@ -206,17 +202,14 @@ for ds in $datasets; do done names=$(zfs list -rt all -o name,clones $TESTPOOL) log_must verify_clones 3 1 $TESTCLONE -i=1 for ds in $datasets; do log_must zfs promote $ds - ((i=i+1)) done log_must local_cleanup log_note "verify clone list truncated correctly" -typeset -i j=200 -i=1 fs=$TESTPOOL/$TESTFS1 +xs=""; for i in {1..200}; do xs+="x"; done if is_linux; then ZFS_MAXPROPLEN=4096 else @@ -224,10 +217,8 @@ else fi log_must zfs create $fs log_must zfs snapshot $fs@snap -while((i <= $(( ZFS_MAXPROPLEN/200+1 )))); do - log_must zfs clone $fs@snap $fs/$TESTCLONE$(python -c 'print "x" * 200').$i - ((i=i+1)) - ((j=j+200)) +for (( i = 1; i <= (ZFS_MAXPROPLEN / 200 + 1); i++ )); do + log_must zfs clone ${fs}@snap ${fs}/${TESTCLONE}${xs}.${i} done clone_list=$(zfs list -o clones $fs@snap) char_count=$(echo "$clone_list" | tail -1 | wc | awk '{print $3}') diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib index a86b2f78f866..b0ced58c9f72 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib @@ -49,19 +49,6 @@ function cmp_prop fi } -# -# Get the value of property used via zfs list -# $1, the dataset name -# -function get_used_prop -{ - typeset ds=$1 - typeset used - - used=`zfs list -H -p -o used $ds` - echo $used -} - # # Check the used space is charged correctly # $1, the number of used space @@ -85,64 +72,72 @@ function check_used # # test ncopies on volume -# $1 test type zfs|ufs, default zfs +# $1 test type zfs|ufs|ext2 # $2 copies -# $3 mntp for ufs test +# $3 mntp for ufs|ext2 test function do_vol_test { typeset type=$1 - typeset copy=$2 + typeset copies=$2 typeset mntp=$3 vol=$TESTPOOL/$TESTVOL1 vol_b_path=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL1 vol_r_path=$ZVOL_RDEVDIR/$TESTPOOL/$TESTVOL1 - log_must zfs create -V $VOLSIZE -o copies=$copy $vol + log_must zfs create -V $VOLSIZE -o copies=$copies $vol log_must zfs set refreservation=none $vol block_device_wait - if [[ $type == "ufs" ]]; then - log_must echo y | newfs $vol_r_path >/dev/null 2>&1 - log_must mount -F ufs -o rw $vol_b_path $mntp - elif [[ $type == "ext2" ]]; then - log_must echo y | newfs $vol_r_path >/dev/null 2>&1 + case "$type" in + "ext2") + log_must eval "echo y | newfs $vol_r_path >/dev/null 2>&1" log_must mount -o rw $vol_b_path $mntp - else + ;; + "ufs") + if is_linux; then + log_unsupported "ufs test not implemented for linux" + fi + log_must eval "newfs $vol_r_path >/dev/null 2>&1" + log_must mount $vol_b_path $mntp + ;; + "zfs") log_must zpool create $TESTPOOL1 $vol_b_path log_must zfs create $TESTPOOL1/$TESTFS1 - fi - - ((nfilesize = copy * ${FILESIZE%m})) - pre_used=$(get_used_prop $vol) + ;; + *) + log_unsupported "$type test not implemented" + ;; + esac + + ((nfilesize = copies * ${FILESIZE%m})) + pre_used=$(get_prop used $vol) ((target_size = pre_used + nfilesize)) - if [[ $type == "ufs" ]]; then - log_must mkfile $FILESIZE $mntp/$FILE - elif [[ $type == "ext2" ]]; then - log_must mkfile $FILESIZE $mntp/$FILE - else + if [[ $type == "zfs" ]]; then log_must mkfile $FILESIZE /$TESTPOOL1/$TESTFS1/$FILE + else + log_must mkfile $FILESIZE $mntp/$FILE fi - post_used=$(get_used_prop $vol) - while ((post_used < target_size)) ; do + post_used=$(get_prop used $vol) + ((retries = 0)) + while ((post_used < target_size && retries++ < 42)); do sleep 1 - post_used=$(get_used_prop $vol) + post_used=$(get_prop used $vol) done ((used = post_used - pre_used)) if ((used < nfilesize)); then log_fail "The space is not charged correctly while setting" \ - "copies as $copy" + "copies as $copies ($used < $nfilesize)" \ + "pre=${pre_used} post=${post_used}" fi - if [[ $type == "ufs" ]]; then - umount $mntp - elif [[ $type == "ext2" ]]; then - umount $mntp - else + if [[ $type == "zfs" ]]; then log_must zpool destroy $TESTPOOL1 + else + log_must umount $mntp fi log_must zfs destroy $vol diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh index a5a9729dc17f..11265cd5afe6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh @@ -92,7 +92,7 @@ for val in 1 2 3; do check_used $used $val done -log_note "Verify df(1M) can corectly display the space charged." +log_note "Verify df(1M) can correctly display the space charged." for val in 1 2 3; do used=`df -F zfs -k /$TESTPOOL/fs_$val/$FILE | grep $TESTPOOL/fs_$val \ | awk '{print $3}'` diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh index 5946bf59679c..4a3ef76de763 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh @@ -70,8 +70,8 @@ if [[ ! -d $mntp ]]; then mkdir -p $mntp fi -for val in 1 2 3; do - do_vol_test $NEWFS_DEFAULT_FS $val $mntp +for copies in 1 2 3; do + do_vol_test $NEWFS_DEFAULT_FS $copies $mntp done log_pass "The volume space used by multiple copies is charged correctly as expected. " diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh index b8190626c7b3..63f5e595ea38 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh @@ -90,7 +90,9 @@ set -A args "$TESTPOOL/" "$TESTPOOL//blah" "$TESTPOOL/@blah" \ "$TESTPOOL/blah*blah" "$TESTPOOL/blah blah" \ "-s $TESTPOOL/$TESTFS1" "-b 1092 $TESTPOOL/$TESTFS1" \ "-b 64k $TESTPOOL/$TESTFS1" "-s -b 32k $TESTPOOL/$TESTFS1" \ - "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" + "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" \ + "$TESTPOOL/." "$TESTPOOL/.." "$TESTPOOL/../blah" "$TESTPOOL/./blah" \ + "$TESTPOOL/blah/./blah" "$TESTPOOL/blah/../blah" log_assert "Verify 'zfs create ' fails with bad argument." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh index 0144b050d7d7..982a4ea16b5e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# 'zfs create -p' should work as expecteed +# 'zfs create -p' should work as expected # # STRATEGY: # 1. To create $newdataset with -p option, first make sure the upper level diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh index 9d5ecab0dfee..7e5072f0d5fd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -51,10 +51,10 @@ # yes unspec 0 1 no no keyformat specified # yes unspec 1 0 yes new encryption root, crypt inherited # yes unspec 1 1 yes new encryption root, crypt inherited -# yes off 0 0 no unencrypted child of encrypted parent -# yes off 0 1 no unencrypted child of encrypted parent -# yes off 1 0 no unencrypted child of encrypted parent -# yes off 1 1 no unencrypted child of encrypted parent +# yes off 0 0 yes unencrypted child of encrypted parent +# yes off 0 1 no keylocation given, but crypt off +# yes off 1 0 no keyformat given, but crypt off +# yes off 1 1 no keyformat given, but crypt off # yes on 0 0 yes inherited encryption, local crypt # yes on 0 1 no no keyformat specified for new key # yes on 1 0 yes new encryption root @@ -113,7 +113,9 @@ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" -log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/c5)" == "off" + log_mustnot zfs create -o encryption=off -o keylocation=prompt \ $TESTPOOL/$TESTFS2/c5 log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ @@ -122,13 +124,13 @@ log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "$TESTPOOL/$TESTFS2/c5" + "$TESTPOOL/$TESTFS2/c6" log_mustnot zfs create -o encryption=on -o keylocation=prompt \ - $TESTPOOL/$TESTFS2/c6 + $TESTPOOL/$TESTFS2/c7 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c7" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c8" log_pass "ZFS creates datasets only if they have a valid combination of" \ "encryption properties set." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh index 534c33f0a02b..26857d48d48b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh @@ -53,7 +53,7 @@ verify_runnable "both" # run 'zfs destroy $opt '. 3rd, check the system status. # # $1 option of 'zfs destroy' -# $2 dataset will be destroied. +# $2 dataset will be destroyed. # function test_n_check { diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh index 2e4a0c3b2bb5..1c5b2cf1c741 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_005_neg.ksh @@ -145,8 +145,8 @@ if is_global_zone; then check_dataset datasetexists $CTR $VOL check_dataset datasetnonexists $VOLSNAP $VOLCLONE - # Due to recusive destroy being a best-effort operation, - # all of the non-busy datasets bellow should be gone now. + # Due to recursive destroy being a best-effort operation, + # all of the non-busy datasets below should be gone now. check_dataset datasetnonexists $FS $FSSNAP $FSCLONE fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh index df7cfcf5271d..58c4cfb5646d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh @@ -24,7 +24,7 @@ # # DESCRIPTION: # 'zfs destroy -R ' can destroy all the child -# snapshots and preserves all the nested datasetss. +# snapshots and preserves all the nested datasets. # # STRATEGY: # 1. Create nested datasets in the storage pool. @@ -57,7 +57,7 @@ for ds in $datasets; do datasetexists $ds || log_fail "Create $ds dataset fail." done -# create recursive nestedd snapshot +# create recursive nested snapshot log_must zfs snapshot -r $TESTPOOL/$TESTFS1@snap for ds in $datasets; do datasetexists $ds@snap || log_fail "Create $ds@snap snapshot fail." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh index 83cd0a27c300..1e129ddd3bc9 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh @@ -157,7 +157,7 @@ verify_snapshots 1 snaps="1 2 3 4 5" setup_snapshots -log_note "Snapshot destory with hold" +log_note "Snapshot destroy with hold" range="1 2 3 4 5" for i in 1 2 3 4 5; do log_must zfs hold keep $TESTPOOL/$TESTFS1@snap$i diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib index 0a6f5ed9d1a7..9a75daedbb30 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib @@ -56,7 +56,7 @@ function setup_testenv #[dtst] if ! datasetexists $FS; then log_must zfs create $FS fi - # Volume test is only availible on globle zone + # Volume test is only available on global zone if ! datasetexists $VOL && is_global_zone; then log_must zfs create -V $VOLSIZE $VOL block_device_wait @@ -127,7 +127,7 @@ function check_dataset shift for dtst in "$@"; do - # Volume and related stuff are unvailable in local zone + # Volume and related stuff are unavailable in local zone if ! is_global_zone; then if [[ $dtst == $VOL || $dtst == $VOLSNAP || \ $dtst == $VOLCLONE ]] @@ -140,7 +140,7 @@ function check_dataset if (( ${#newlist} != 0 )); then # Run each item in $newlist individually so on failure, the - # probelmatic dataset is listed in the logs. + # problematic dataset is listed in the logs. for i in $newlist; do log_must $funname $i done diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh index 471e9ca68e7e..96e6d9b5ae89 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_encrypted.ksh @@ -24,14 +24,15 @@ # 1. Create an encrypted dataset # 2. Create two snapshots of the dataset # 3. Perform 'zfs diff -Ft' and verify no errors occur +# 4. Perform the same test on a dataset with large dnodes # verify_runnable "both" function cleanup { - datasetexists $TESTPOOL/$TESTFS1 && \ - log_must zfs destroy -r $TESTPOOL/$TESTFS1 + destroy_dataset "$TESTPOOL/$TESTFS1" "-r" + destroy_dataset "$TESTPOOL/$TESTFS2" "-r" } log_assert "'zfs diff' should work with encrypted datasets" @@ -50,4 +51,13 @@ log_must zfs snapshot $TESTPOOL/$TESTFS1@snap2 # 3. Perform 'zfs diff' and verify no errors occur log_must zfs diff -Ft $TESTPOOL/$TESTFS1@snap1 $TESTPOOL/$TESTFS1@snap2 +# 4. Perform the same test on a dataset with large dnodes +log_must eval "echo 'password' | zfs create -o dnodesize=4k \ + -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS2" +MNTPOINT="$(get_prop mountpoint $TESTPOOL/$TESTFS2)" +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1 +log_must touch "$MNTPOINT/file" +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap2 +log_must zfs diff -Ft $TESTPOOL/$TESTFS2@snap1 $TESTPOOL/$TESTFS2@snap2 + log_pass "'zfs diff' works with encrypted datasets" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh index b038e7484ab2..4bd61137c7be 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh @@ -114,7 +114,7 @@ availspace=$(get_prop available $TESTPOOL) typeset -i i=0 # make sure 'availspace' is larger then twice of FILESIZE to create a new pool. -# If any, we only totally create 3 pools for multple datasets testing to limit +# If any, we only totally create 3 pools for multiple datasets testing to limit # testing time while (( availspace > DFILESIZE )) && (( i < 3 )) ; do (( i += 1 )) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh index 2d97c5918acd..d4ebbb155efd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh @@ -92,5 +92,16 @@ log_must eval "zfs get -H -t snapshot -o name creation $DEPTH_FS > $DEPTH_OUTPUT log_must eval "zfs get -H -t snapshot -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT +# Ensure 'zfs get -t snap' works as a shorthand for 'zfs get -t snapshot' +log_must eval "zfs get -H -t snap -d 1 -o name creation $DEPTH_FS > $DEPTH_OUTPUT" +log_must eval "zfs get -H -t snapshot -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" +log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT + +# Ensure 'zfs get -t bookmark ' works as though -d 1 was specified +log_must eval "zfs get -H -t bookmark -o name creation $DEPTH_FS > $DEPTH_OUTPUT" +log_must eval "zfs get -H -t bookmark -d 1 -o name creation $DEPTH_FS > $EXPECT_OUTPUT" +log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT + + log_pass "'zfs get -d ' should get expected output." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib index 8ef8d9aa160d..48b3268f7813 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib @@ -37,7 +37,7 @@ set -A depth_options "d 0" "d 1" "d 2" "d 4" "d 32" set -A depth_array 0 1 2 4 32 # -# Setup multiple depths datasets, including fs, volume and snapshot. +# Setup multiple depths datasets, including fs, volumes, snapshots and bookmarks. # function depth_fs_setup { @@ -65,6 +65,7 @@ function depth_fs_setup log_must zfs create -V 8M $fs/vol_"$j"_depth"$i" fi log_must zfs snapshot $fs@snap_"$j"_depth"$i" + log_must zfs bookmark $fs@snap_"$j"_depth"$i" '#bookmark'_"$j"_depth"$i" (( j=j+1 )) done done diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh index 584039f543c6..3ef65b517c6d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh @@ -36,8 +36,8 @@ # 'zfs inherit' should return an error with bad parameters in one command. # # STRATEGY: -# 1. Set an array of bad options and invlid properties to 'zfs inherit' -# 2. Execute 'zfs inherit' with bad options and passing invlid properties +# 1. Set an array of bad options and invalid properties to 'zfs inherit' +# 2. Execute 'zfs inherit' with bad options and passing invalid properties # 3. Verify an error is returned. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh index bc0d8c59c0cd..3317b09e2b5b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh @@ -37,8 +37,8 @@ # 'zfs inherit' should return an error with bad parameters in one command. # # STRATEGY: -# 1. Set an array of bad options and invlid properties to 'zfs inherit' -# 2. Execute 'zfs inherit' with bad options and passing invlid properties +# 1. Set an array of bad options and invalid properties to 'zfs inherit' +# 2. Execute 'zfs inherit' with bad options and passing invalid properties # 3. Verify an error is returned. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am index b2de98934b74..8a137b8303c0 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am @@ -19,6 +19,7 @@ dist_pkgdata_SCRIPTS = \ zfs_mount_all_mountpoints.ksh \ zfs_mount_encrypted.ksh \ zfs_mount_remount.ksh \ + zfs_mount_test_race.ksh \ zfs_multi_mount.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh index e2ef0bf00db0..52ae1879d1ae 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh @@ -45,7 +45,7 @@ # setuid setuid/nosetuid # # STRATEGY: -# 1. Create filesystem and get origianl property value. +# 1. Create filesystem and get original property value. # 2. Using 'zfs mount -o' to set filesystem property. # 3. Verify the property was set temporarily. # 4. Verify it will not affect the property that is stored on disk. diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh index 5f88b611002a..84835a0d6d62 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh @@ -83,7 +83,7 @@ log_must mkfile 1M $mntpnt/$TESTFILE2 log_mustnot ls $testfile log_must ls $mntpnt/$TESTFILE1 $mntpnt/$TESTFILE2 -# Verify $TESTFILE2 was created in $fs1, rather then $fs +# Verify $TESTFILE2 was created in $fs1, rather than $fs log_must zfs unmount $fs1 log_must zfs set mountpoint=$mntpnt1 $fs1 log_must zfs mount $fs1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh index 963ad626c2d0..0b5d61f62f40 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh @@ -72,4 +72,4 @@ else fi cd $curpath -log_pass "zfs mount fails with mounted filesystem or busy moutpoint as expected." +log_pass "zfs mount fails with mounted filesystem or busy mountpoint as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh index f7a0978352b5..66a4338655de 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh @@ -29,7 +29,7 @@ # # DESCRIPTION: -# Verify remount functionality, expecially on readonly objects. +# Verify remount functionality, especially on readonly objects. # # STRATEGY: # 1. Prepare a filesystem and a snapshot diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh new file mode 100755 index 000000000000..404770b2727f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh @@ -0,0 +1,116 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.cfg + +# +# DESCRIPTION: +# Verify parallel mount ordering is consistent. +# +# There was a bug in initial thread dispatching algorithm which put threads +# under race condition which resulted in undefined mount order. The purpose +# of this test is to verify `zfs unmount -a` succeeds (not `zfs mount -a` +# succeeds, it always does) after `zfs mount -a`, which could fail if threads +# race. See github.com/zfsonlinux/zfs/issues/{8450,8833,8878} for details. +# +# STRATEGY: +# 1. Create pools and filesystems. +# 2. Set same mount point for >1 datasets. +# 3. Unmount all datasets. +# 4. Mount all datasets. +# 5. Unmount all datasets (verify this succeeds). +# + +verify_runnable "both" + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} +MNTPT=$TMPDIR/zfs_mount_test_race_mntpt +DISK1="$TMPDIR/zfs_mount_test_race_disk1" +DISK2="$TMPDIR/zfs_mount_test_race_disk2" + +TESTPOOL1=zfs_mount_test_race_tp1 +TESTPOOL2=zfs_mount_test_race_tp2 + +export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +function cleanup +{ + zpool destroy $TESTPOOL1 + zpool destroy $TESTPOOL2 + rm -rf $MNTPT + rm -rf /$TESTPOOL1 + rm -rf /$TESTPOOL2 + rm -f $DISK1 + rm -f $DISK2 + export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" + log_must zfs $mountall + unset __ZFS_POOL_RESTRICT +} +log_onexit cleanup + +log_note "Verify parallel mount ordering is consistent" + +log_must truncate -s $MINVDEVSIZE $DISK1 +log_must truncate -s $MINVDEVSIZE $DISK2 + +log_must zpool create -f $TESTPOOL1 $DISK1 +log_must zpool create -f $TESTPOOL2 $DISK2 + +log_must zfs create $TESTPOOL1/$TESTFS1 +log_must zfs create $TESTPOOL2/$TESTFS2 + +log_must zfs set mountpoint=none $TESTPOOL1 +log_must zfs set mountpoint=$MNTPT $TESTPOOL1/$TESTFS1 + +# Note that unmount can fail (due to race condition on `zfs mount -a`) with or +# without `canmount=off`. The race has nothing to do with canmount property, +# but turn it off for convenience of mount layout used in this test case. +log_must zfs set canmount=off $TESTPOOL2 +log_must zfs set mountpoint=$MNTPT $TESTPOOL2 + +# At this point, layout of datasets in two pools will look like below. +# Previously, on next `zfs mount -a`, pthreads assigned to TESTFS1 and TESTFS2 +# could race, and TESTFS2 usually (actually always) won in ZoL. Note that the +# problem is how two or more threads could initially be assigned to the same +# top level directory, not this specific layout. This layout is just an example +# that can reproduce race, and is also the layout reported in #8833. +# +# NAME MOUNTED MOUNTPOINT +# ---------------------------------------------- +# /$TESTPOOL1 no none +# /$TESTPOOL1/$TESTFS1 yes $MNTPT +# /$TESTPOOL2 no $MNTPT +# /$TESTPOOL2/$TESTFS2 yes $MNTPT/$TESTFS2 + +# Apparently two datasets must be mounted. +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# This unmount always succeeds, because potential race hasn't happened yet. +log_must zfs unmount -a +# This mount always succeeds, whether threads are under race condition or not. +log_must zfs mount -a + +# Verify datasets are mounted (TESTFS2 fails if the race broke mount order). +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# Verify unmount succeeds (fails if the race broke mount order). +log_must zfs unmount -a + +log_pass "Verify parallel mount ordering is consistent passed" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh index 1d769096b4fb..3788543b0b2f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh @@ -91,14 +91,28 @@ typeset -a pos_cmds_out=( } } }") + +# +# N.B. json.tool is needed to guarantee consistent ordering of fields, +# sed is needed to trim trailing space in CentOS 6's json.tool output +# +# As of Python 3.5 the behavior of json.tool changed to keep the order +# the same as the input and the --sort-keys option was added. Detect when +# --sort-keys is supported and apply the option to ensure the expected order. +# +if python -m json.tool --sort-keys <<< "{}"; then + JSON_TOOL_CMD="python -m json.tool --sort-keys" +else + JSON_TOOL_CMD="python -m json.tool" +fi + typeset -i cnt=0 typeset cmd for cmd in ${pos_cmds[@]}; do log_must zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 - log_must zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 - # json.tool is needed to guarantee consistent ordering of fields - # sed is needed to trim trailing space in CentOS 6's json.tool output - OUTPUT=$(zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 | python -m json.tool | sed 's/[[:space:]]*$//') + log_must zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 + OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 | + $JSON_TOOL_CMD | sed 's/[[:space:]]*$//') if [ "$OUTPUT" != "${pos_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${pos_cmds_out[$cnt]}" @@ -120,9 +134,9 @@ For the property list, run: zfs set|get For the delegated permission list, run: zfs allow|unallow") cnt=0 for cmd in ${neg_cmds[@]}; do - log_mustnot zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 - log_mustnot zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1 - OUTPUT=$(zfs program $TESTPOOL -j $TESTZCP $TESTDS $cmd 2>&1) + log_mustnot zfs program $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1 + log_mustnot zfs program -j $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1 + OUTPUT=$(zfs program -j $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1) if [ "$OUTPUT" != "${neg_cmds_out[$cnt]}" ]; then log_note "Got :$OUTPUT" log_note "Expected:${neg_cmds_out[$cnt]}" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh index 336c7b2538bc..2c7584d3541d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh @@ -29,11 +29,12 @@ # 1. Create an encrypted dataset # 2. Clone the encryption root # 3. Clone the clone -# 4. Verify the encryption root of all three datasets is the origin +# 4. Add children to each of these three datasets +# 4. Verify the encryption root of all datasets is the origin # 5. Promote the clone of the clone -# 6. Verify the encryption root of all three datasets is still the origin -# 7. Promote the clone of the original encryption root -# 8. Verify the encryption root of all three datasets is the promoted dataset +# 6. Verify the encryption root of all datasets is still the origin +# 7. Promote the dataset again, so it is now the encryption root +# 8. Verify the encryption root of all datasets is the promoted dataset # verify_runnable "both" @@ -62,19 +63,31 @@ log_must zfs snap $snaproot log_must zfs clone $snaproot $TESTPOOL/clone1 log_must zfs snap $snapclone log_must zfs clone $snapclone $TESTPOOL/clone2 +log_must zfs create $TESTPOOL/$TESTFS1/child0 +log_must zfs create $TESTPOOL/clone1/child1 +log_must zfs create $TESTPOOL/clone2/child2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/$TESTFS1 log_must zfs promote $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/$TESTFS1 log_must zfs promote $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/clone2 log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child0 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone1/child1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone2/child2 $TESTPOOL/clone2 log_pass "ZFS promotes clones of an encryption root" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh index bf94274ddbf8..cbbacace1ec6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh @@ -11,13 +11,13 @@ # # -# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2012, 2017 by Delphix. All rights reserved. # # # DESCRIPTION # Verify that "zfs list" gives correct values for written and written@ -# proerties for the dataset when different operations are on done on it +# properties for the dataset when different operations are on done on it # # # STRATEGY @@ -86,7 +86,7 @@ blocks=0 for i in 1 2 3; do written=$(get_prop written $TESTPOOL/$TESTFS1@snap$i) if [[ $blocks -eq 0 ]]; then - # Written value for the frist non-clone snapshot is + # Written value for the first non-clone snapshot is # expected to be equal to the referenced value. expected_written=$( \ get_prop referenced $TESTPOOL/$TESTFS1@snap$i) @@ -120,7 +120,7 @@ sync_pool written=$(get_prop written $TESTPOOL/$TESTFS1) writtenat3=$(get_prop written@snap3 $TESTPOOL/$TESTFS1) [[ $written -eq $writtenat3 ]] || \ - log_fail "Written and written@ dont match $written $writtenat3" + log_fail "Written and written@ don't match $written $writtenat3" within_percent $written $before_written 0.1 && \ log_fail "Unexpected written value after delete $written $before_written" writtenat=$(get_prop written@snap1 $TESTPOOL/$TESTFS1) @@ -216,15 +216,15 @@ for ds in $datasets; do count=$blocks sync_pool done -recursive_output=$(zfs get -r written@current $TESTPOOL | \ +recursive_output=$(zfs get -p -r written@current $TESTPOOL | \ grep -v $TESTFS1@ | grep -v $TESTFS2@ | grep -v $TESTFS3@ | \ grep -v "VALUE" | grep -v "-") -expected="20.0M" +expected="$((20 * mb_block))" for ds in $datasets; do writtenat=$(echo "$recursive_output" | grep -v $ds/) writtenat=$(echo "$writtenat" | grep $ds | awk '{print $3}') - [[ $writtenat == $expected ]] || \ - log_fail "recursive written property output mismatch" + within_percent $writtenat $expected 99.5 || \ + log_fail "Unexpected written@ value on $ds" done log_pass "zfs written and written@ property fields print correct values" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh index 5ce0e02fa617..f8439dcbbebd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh @@ -155,7 +155,7 @@ for orig_fs in $datasets ; do log_must zfs destroy -Rf $rst_fs - log_note "Verfiying 'zfs receive -d ' works." + log_note "Verifying 'zfs receive -d ' works." i=0 while (( i < ${#bkup[*]} )); do diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh index fcbdc5e1594e..3a9c2279a61d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh @@ -36,7 +36,7 @@ # Verify 'zfs receive' fails with malformed parameters. # # STRATEGY: -# 1. Denfine malformed parameters array +# 1. Define malformed parameters array # 2. Feed the malformed parameters to 'zfs receive' # 3. Verify the command should be failed # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh index 5eee9eecf4bb..a1d094bdb4ba 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -31,9 +31,9 @@ # 4. Snapshot the encrypted dataset # 5. Attempt to receive the snapshot into an unencrypted child # 6. Verify encryption is not enabled -# 7. Verify the cheksum of the file is the same as the original +# 7. Verify the checksum of the file is the same as the original # 8. Attempt to receive the snapshot into an encrypted child -# 9. Verify the cheksum of the file is the same as the original +# 9. Verify the checksum of the file is the same as the original # verify_runnable "both" @@ -59,7 +59,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS2" log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) log_must zfs snapshot $snap @@ -69,14 +69,14 @@ log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1) [[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" log_note "Verify ZFS can receive into an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1" -typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS2/c1/$TESTFILE0 | awk '{ print $1 }') +typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS2/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh index 2042b37a98f7..9740caf72508 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -31,11 +31,12 @@ # 4. Attempt to receive a raw send stream as a child of an unencrypted dataset # 5. Verify the key is unavailable # 6. Attempt to load the key and mount the dataset -# 7. Verify the cheksum of the file is the same as the original +# 7. Verify the checksum of the file is the same as the original # 8. Attempt to receive a raw send stream as a child of an encrypted dataset # 9. Verify the key is unavailable # 10. Attempt to load the key and mount the dataset -# 11. Verify the cheksum of the file is the same as the original +# 11. Verify the checksum of the file is the same as the original +# 12. Verify 'zfs receive -n' works with the raw stream # verify_runnable "both" @@ -60,8 +61,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS1" log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | \ - awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap @@ -74,7 +74,7 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2) log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" @@ -85,9 +85,10 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1) log_fail "Expected keystatus unavailable, got $keystatus" log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1" -typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | \ - awk '{ print $1 }') +typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" +log_must eval "zfs send -w $snap | zfs receive -n $TESTPOOL/$TESTFS3" + log_pass "ZFS can receive streams from raw sends" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh index 1e91c6262c48..c52a12e78ac3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh @@ -69,7 +69,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_must zfs snapshot $snap1 log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | awk '{ print $1 }') +typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap2 @@ -89,7 +89,7 @@ log_must zfs unload-key $TESTPOOL/$TESTFS2 log_must eval "zfs receive $TESTPOOL/$TESTFS2 < $ibackup" log_must eval "echo $passphrase2 | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh index 57896c6fd305..f8e53f02c23d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -46,7 +46,7 @@ function cleanup log_onexit cleanup -log_assert "ZFS should receive to an encrypted child dataset" +log_assert "ZFS should receive encrypted filesystems into child dataset" typeset passphrase="password" typeset snap="$TESTPOOL/$TESTFS@snap" @@ -60,11 +60,13 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_note "Verifying ZFS will receive to an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" -log_note "Verifying 'send -p' will not receive to an encrypted child" -log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_note "Verifying 'send -p' will receive to an encrypted child" +log_must eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c2)" == "off" -log_note "Verifying 'send -R' will not receive to an encrypted child" -log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_note "Verifying 'send -R' will receive to an encrypted child" +log_must eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c3)" == "off" log_note "Verifying ZFS will not receive to an encrypted child when the" \ "parent key is unloaded" @@ -72,4 +74,4 @@ log_must zfs unmount $TESTPOOL/$TESTFS1 log_must zfs unload-key $TESTPOOL/$TESTFS1 log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" -log_pass "ZFS can receive to an encrypted child dataset" +log_pass "ZFS can receive encrypted filesystems into child dataset" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh index 400592aaca2c..1b9c6e3c704f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -23,12 +23,13 @@ # # DESCRIPTION: -# 'zfs rename' should not rename an unencrypted dataset to a child +# 'zfs rename' should be able to move an unencrypted dataset to a child # of an encrypted dataset # # STRATEGY: # 1. Create an encrypted dataset -# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# 2. Rename the default dataset to a child of the encrypted dataset +# 3. Confirm the child dataset doesn't have any encryption properties # verify_runnable "both" @@ -36,16 +37,17 @@ verify_runnable "both" function cleanup { datasetexists $TESTPOOL/$TESTFS2 && \ - log_must zfs destroy $TESTPOOL/$TESTFS2 + log_must zfs destroy -r $TESTPOOL/$TESTFS2 } log_onexit cleanup -log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ +log_assert "'zfs rename' should allow renaming an unencrypted dataset to a" \ "child of an encrypted dataset" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" -log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/$TESTFS)" == "off" -log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ +log_pass "'zfs rename' allows renaming an unencrypted dataset to a child" \ "of an encrypted dataset" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib index 5b157d11c15f..f69ec300ca98 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib @@ -147,7 +147,7 @@ function setup_clone_env } # -# Clean up the test environmnet +# Clean up the test environment # # $1 number of snapshot Note: Currently only support three snapshots. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh index b0a319d41930..2c6e3fdd6d2f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh @@ -75,7 +75,7 @@ log_onexit cleanup init_snap=$TESTPOOL/$TESTFS@init_snap inc_snap=$TESTPOOL/$TESTFS@inc_snap full_bkup=$TEST_BASE_DIR/fullbkup.$$ -inc_bkup=/var/tmp/incbkup.$$ +inc_bkup=$TEST_BASE_DIR/incbkup.$$ init_data=$TESTDIR/$TESTFILE1 inc_data=$TESTDIR/$TESTFILE2 orig_sum="" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh index da14fa2fa62c..4a9d29fce1cf 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh @@ -65,7 +65,7 @@ snap2=$fs@snap2 snap3=$fs@snap3 set -A badargs \ - "" "$TESTPOOL" "$TESTFS" "$fs" "$fs@nonexisten_snap" "?" \ + "" "$TESTPOOL" "$TESTFS" "$fs" "$fs@nonexistent_snap" "?" \ "$snap1/blah" "$snap1@blah" "-i" "-x" "-i $fs" \ "-x $snap1 $snap2" "-i $snap1" \ "-i $snap2 $snap1" "$snap1 $snap2" "-i $snap1 $snap2 $snap3" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh index 7192551b6c5d..652f7b738f0e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh @@ -54,7 +54,7 @@ function get_estimate_size typeset snapshot=$1 typeset option=$2 typeset base_snapshot=${3:-""} - if [[ -z $3 ]];then + if [[ -z $3 ]]; then typeset total_size=$(zfs send $option $snapshot 2>&1 | tail -1) else typeset total_size=$(zfs send $option $base_snapshot $snapshot \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh index 5fbc8bf71657..caad211bcf65 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg.ksh @@ -64,4 +64,4 @@ do done done -log_pass "Setting invalid {primary|secondary}cache on fs or volume fail as expeced." +log_pass "Setting invalid {primary|secondary}cache on fs or volume fail as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh index 7cbcf7903e33..3b8b88e3631e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh @@ -40,7 +40,7 @@ # # STRATEGY: # 1. Setup a pool and create fs, volume, snapshot clone within it. -# 2. Set canmount=noauto for each dataset and check the retuen value +# 2. Set canmount=noauto for each dataset and check the return value # and check if it still can be mounted by mount -a. # 3. mount each dataset(except volume) to see if it can be mounted. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh index ad33e18fbb24..48580cafdb31 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh @@ -34,7 +34,7 @@ # # DESCRIPTION: -# If ZFS is currently managing the file system but it is currently unmoutned, +# If ZFS is currently managing the file system but it is currently unmounted, # and the mountpoint property is changed, the file system remains unmounted. # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib index 084a4a0a82ac..5e9f719dfcfe 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib @@ -156,7 +156,7 @@ function random_string } # -# Get vaild user defined property name +# Get valid user defined property name # # $1 user defined property name length # @@ -189,7 +189,7 @@ function valid_user_property } # -# Get invaild user defined property name +# Get invalid user defined property name # # $1 user defined property name length # diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh index 2efcf1cceb7e..5d8b6e2750f5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh @@ -82,7 +82,7 @@ while (( i < ${#args[*]} )); do ((i = i + 1)) done -# Testing the invalid senario: the child volume already has an +# Testing the invalid scenario: the child volume already has an # identical name snapshot, zfs snapshot -r should fail when # creating snapshot with -r for the parent log_must zfs destroy $TESTPOOL/$TESTCTR/$TESTFS1@$TESTSNAP diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh index 377910013271..627910abd6ed 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_008_neg.ksh @@ -34,7 +34,7 @@ # STRATEGY: # 1. Create 2 separate zpools, zpool name lengths must be the same. # 2. Attempt to simultaneously create a snapshot of each pool. -# 3. Veriy the snapshot creation failed. +# 3. Verify the snapshot creation failed. # verify_runnable "both" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh index 4cd98af0c69d..f0682b816ae8 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh @@ -22,7 +22,7 @@ # 1. Create multiple datasets # 2. Create multiple snapshots with a list of valid and invalid # snapshot names -# 3. Verify the valid snpashot creation +# 3. Verify the valid snapshot creation . $STF_SUITE/include/libtest.shlib @@ -86,7 +86,7 @@ for i in 1 2 3; do txg_tag=$(echo "$txg_group" | nawk -v j=$i 'FNR == j {print}') [[ $txg_tag != $(echo "$txg_group" | \ nawk -v j=$i 'FNR == j {print}') ]] \ - && log_fail "snapshots belong to differnt transaction groups" + && log_fail "snapshots belong to different transaction groups" done log_note "verify snapshot contents" for ds in $datasets; do diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh index e83e8d5165eb..afec9d896294 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh @@ -95,15 +95,14 @@ for arg in ${badargs[@]}; do log_mustnot eval "zfs unmount $arg $fs >/dev/null 2>&1" done - -#Testing invalid datasets +# Testing invalid datasets for ds in $snap $vol "blah"; do for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt $ds >/dev/null 2>&1" done done -#Testing invalid mountpoint +# Testing invalid mountpoint dir=foodir.$$ file=foo.$$ fs1=$TESTPOOL/fs.$$ @@ -119,20 +118,20 @@ for mpt in "./$dir" "./$file" "/tmp"; do done cd $curpath -#Testing null argument and too many arguments +# Testing null argument and too many arguments for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt >/dev/null 2>&1" log_mustnot eval "zfs unmount $opt $fs $fs1 >/dev/null 2>&1" done -#Testing already unmounted filesystem +# Testing already unmounted filesystem log_must zfs unmount $fs1 for opt in "" "-f"; do log_mustnot eval "zfs unmount $opt $fs1 >/dev/null 2>&1" log_mustnot eval "zfs unmount /tmp/$dir >/dev/null 2>&1" done -#Testing legacy mounted filesystem +# Testing legacy mounted filesystem log_must zfs set mountpoint=legacy $fs1 if is_linux; then log_must mount -t zfs $fs1 /tmp/$dir diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh index 0ed14a99fc27..3575875c2767 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh @@ -83,7 +83,7 @@ function restore_dataset } -log_assert "zfs fource unmount and destroy in snapshot directory will not cause error." +log_assert "zfs force unmount and destroy in snapshot directory will not cause error." log_onexit cleanup for fs in $TESTPOOL/$TESTFS $TESTPOOL ; do @@ -139,4 +139,4 @@ log_must eval zpool list > /dev/null 2>&1 log_must eval zpool status > /dev/null 2>&1 zpool iostat > /dev/null 2>&1 -log_pass "zfs fource unmount and destroy in snapshot directory will not cause error." +log_pass "zfs force unmount and destroy in snapshot directory will not cause error." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh index 7bb1cd4a37ca..ca625bd2278a 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh @@ -140,7 +140,7 @@ while (( i < ${#mntp_fs[*]} )); do ((i = i + 2)) done -log_note "Verify 'zfs unshare -a' succeds as root." +log_note "Verify 'zfs unshare -a' succeeds as root." i=0 typeset sharenfs_val diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh index e92581c7c9bf..fd916040b1bc 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_004_neg.ksh @@ -46,7 +46,7 @@ verify_runnable "global" export NONEXISTFSNAME="nonexistfs50charslong_0123456789012345678901234567" export NONEXISTMOUNTPOINT="/nonexistmountpoint_0123456789" -set -A opts "" "$TESTPOOL/$NONEXISTFSNAME" "$NONEEXISTMOUNTPOINT" "-?" "-1" \ +set -A opts "" "$TESTPOOL/$NONEXISTFSNAME" "$NONEXISTMOUNTPOINT" "-?" "-1" \ "-a blah" "$TESTPOOL/$TESTFS $TESTPOOL/$TESTFS1" \ "-f $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS1" \ "$TESTPOOL/$TESTFS $TESTDIR" "-f $TESTPOOL/$TESTFS $TESTDIR" \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh index e37b4f81abf4..d3ed4a736cc9 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh @@ -133,7 +133,7 @@ COUNT=$( wc -l $output | awk '{print $1}' ) if (( COUNT != OLDCOUNT )); then cat $output - log_fail "Unexpect old-version filesystems print out." + log_fail "Unexpected old-version filesystems print out." fi log_pass "Executing 'zfs upgrade' command succeeds." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh index a3158bd57819..25decd78863b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_001_neg.ksh @@ -37,7 +37,7 @@ # return an error. # # STRATEGY: -# 1. Create an array containg each zpool sub-command name. +# 1. Create an array containing each zpool sub-command name. # 2. For each element, execute the sub-command. # 3. Verify it returns an error. # diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib index f80a2a864e43..94615ee3a0b5 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib @@ -90,7 +90,7 @@ function find_mnttab_dev } # -# Save the systme current dump device configuration +# Save the system current dump device configuration # function save_dump_dev { diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib index 9e6874832066..31244f4ecb8b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib @@ -45,7 +45,7 @@ function create_pool_test typeset vdevs eval "typeset -a diskarray=($3)" - for vdevs in "${diskarray[@]}";do + for vdevs in "${diskarray[@]}"; do create_pool $pool $keywd $vdevs log_must poolexists $pool destroy_pool $pool @@ -146,7 +146,7 @@ function find_vfstab_dev } # -# Save the systme current dump device configuration +# Save the system current dump device configuration # function save_dump_dev { diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh index 2afbec37dca9..de5e9d8e79c3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh @@ -95,7 +95,7 @@ do log_must zpool create $opt $TESTPOOL ${pooltype[i]} \ $file.1 $file.2 $file.3 ! poolexists $TESTPOOL && \ - log_fail "Createing pool with $opt fails." + log_fail "Creating pool with $opt fails." mpt=`zfs mount | egrep "^$TESTPOOL[^/]" | awk '{print $2}'` (( ${#mpt} == 0 )) && \ log_fail "$TESTPOOL created with $opt is not mounted." @@ -105,12 +105,12 @@ do from the output of zfs mount" if [[ "$opt" == "-m $TESTDIR1" ]]; then [[ ! -d $TESTDIR1 ]] && \ - log_fail "$TESTDIR1 is not created auotmatically." + log_fail "$TESTDIR1 is not created automatically." [[ "$mpt" != "$TESTDIR1" ]] && \ log_fail "$TESTPOOL is not mounted on $TESTDIR1." elif [[ "$opt" == "-R $TESTDIR1" ]]; then [[ ! -d $TESTDIR1/$TESTPOOL ]] && \ - log_fail "$TESTDIR1/$TESTPOOL is not created auotmatically." + log_fail "$TESTDIR1/$TESTPOOL is not created automatically." [[ "$mpt" != "$TESTDIR1/$TESTPOOL" ]] && \ log_fail "$TESTPOOL is not mounted on $TESTDIR1/$TESTPOOL." else diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh index 3fca607b1f46..cbb5806d9af6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh @@ -41,7 +41,7 @@ # STRATEGY: # 1. delete all devices in the swap # 2. create a zpool -# 3. Verify the creation is successed. +# 3. Verify the creation was successful # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh index aa154d5c65cc..e521d8f1cff0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh @@ -45,6 +45,7 @@ # N 1 1 no keyformat given, but crypt off # Y 0 0 no no keyformat specified for new key # Y 0 1 no no keyformat specified for new key +# Y 1 1 no unsupported combination of non-encryption props # Y 1 0 yes new encryption root # Y 1 1 yes new encryption root # @@ -83,6 +84,10 @@ log_mustnot zpool create -O encryption=on $TESTPOOL $DISKS log_mustnot zpool create -O encryption=on -O keylocation=prompt \ $TESTPOOL $DISKS +log_mustnot eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt" \ + "-o feature@lz4_compress=disabled -O compression=lz4 $TESTPOOL $DISKS" + log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ "-O keyformat=passphrase $TESTPOOL $DISKS" log_must zpool destroy $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh index ab862354b810..67038a4743d8 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh @@ -34,7 +34,7 @@ log_assert "'zpool events -c' should successfully clear events." # 1. Clear all ZFS events # This is needed because we may already over the max number or events queued # (zfs_zevent_len_max) generated by previous tests: generating $EVENTS_NUM new -# events and then counting them is racy and leads to failues, so start from 0. +# events and then counting them is racy and leads to failures, so start from 0. log_must zpool events -c # 2. Generate some new ZFS events diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am index 36a7f23126a4..0c87c9b37763 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile.am @@ -5,7 +5,8 @@ dist_pkgdata_SCRIPTS = \ zpool_get_001_pos.ksh \ zpool_get_002_pos.ksh \ zpool_get_003_pos.ksh \ - zpool_get_004_neg.ksh + zpool_get_004_neg.ksh \ + zpool_get_005_pos.ksh dist_pkgdata_DATA = \ - zpool_get.cfg + zpool_get.cfg zpool_get_parsable.cfg diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh new file mode 100755 index 000000000000..ad27d180fdb1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_005_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2014 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg + +# +# DESCRIPTION: +# +# Zpool get returns parsable values for all known parsable properties +# +# STRATEGY: +# 1. For all parsable properties, verify zpool get -p returns a parsable value +# + +if ! is_global_zone ; then + TESTPOOL=${TESTPOOL%%/*} +fi + +typeset -i i=0 + +while [[ $i -lt "${#properties[@]}" ]]; do + log_note "Checking for parsable ${properties[$i]} property" + log_must eval "zpool get -p ${properties[$i]} $TESTPOOL >/tmp/value.$$" + grep "${properties[$i]}" /tmp/value.$$ >/dev/null 2>&1 + if [[ $? -ne 0 ]]; then + log_fail "${properties[$i]} not seen in output" + fi + + typeset v=$(grep "${properties[$i]}" /tmp/value.$$ | awk '{print $3}') + + log_note "${properties[$i]} has a value of $v" + + # Determine if this value is a valid number, result in return code + log_must test -n "$v" + expr $v + 0 >/dev/null 2>&1 + + # All properties must be positive integers in order to be + # parsable (i.e. a return code of 0 or 1 from expr above). + # The only exception is "expandsize", which may be "-". + if [[ ! ($? -eq 0 || $? -eq 1 || \ + ("${properties[$i]}" = "expandsize" && "$v" = "-")) ]]; then + log_fail "${properties[$i]} is not parsable" + fi + + i=$(( $i + 1 )) +done + +rm /tmp/value.$$ +log_pass "Zpool get returns parsable values for all known parsable properties" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg new file mode 100644 index 000000000000..e7b95a47223b --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get_parsable.cfg @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2014 by Delphix. All rights reserved. +# + +# Set the expected properties of zpool +typeset -a properties=("allocated" "capacity" "expandsize" "free" "freeing" + "leaked" "size") diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh index dd1be14a066b..a2b73182bf4c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh @@ -38,7 +38,7 @@ # # STRATEGY: # 1. Create pool, volume & snap -# 2. Verify 'zpool history' can cope with incorret arguments. +# 2. Verify 'zpool history' can cope with incorrect arguments. # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh index 23d79c69075e..887993dfd1ec 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh @@ -50,7 +50,7 @@ function dev_checksum log_note "Compute checksum of '$dev'" - checksum=$(md5sum $dev) + checksum=$(md5digest $dev) if [[ $? -ne 0 ]]; then log_fail "Failed to compute checksum of '$dev'" return 1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh index e8f3937609d1..f42ba10d65c4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh @@ -115,7 +115,7 @@ function test_common # further than the time that we took the checkpoint. # # Note that, ideally we would want to take a checkpoint - # right after we recond the txg we plan to rewind to. + # right after we record the txg we plan to rewind to. # But since we can't attach, detach or remove devices # while having a checkpoint, we take it after the # operation that changes the config. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib index f53b88f794e4..c365ec4adb22 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib @@ -1,5 +1,3 @@ -#!/bin/ksh - # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. @@ -81,10 +79,10 @@ function write_some_data # # Create/overwrite a few datasets with files. -# Apply md5sum on all the files and store checksums in a file. +# Checksum all the files and store digests in a file. # # newdata: overwrite existing files if false. -# md5file: file where to store md5sums +# md5file: file where to store md5 digests # datasetname: base name for datasets # function _generate_data_common @@ -104,7 +102,10 @@ function _generate_data_common for j in {1..$files}; do typeset file="/$pool/$datasetname$i/file$j" dd if=/dev/urandom of=$file bs=128k count=$blocks > /dev/null - [[ -n $md5file ]] && md5sum $file >> $md5file + if [[ -n $md5file ]]; then + typeset cksum=$(md5digest $file) + echo $cksum $file >> $md5file + fi done ( $newdata ) && sync_pool "$pool" done @@ -142,8 +143,15 @@ function verify_data_md5sums return 1 fi - md5sum -c --quiet $md5file - return $? + cat $md5file | \ + while read digest file; do + typeset digest1=$(md5digest $file) + if [[ "$digest1" != "$digest" ]]; then + return 1 + fi + done + + return 0 } # diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh index 7534ebca87fe..c6d2637074fe 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh @@ -54,7 +54,7 @@ # 3. Export the test pool. # 4. Move one or more device files to other directory # 5. Verify 'zpool import -d' with the new directory -# will handle moved files successfullly. +# will handle moved files successfully. # Using the various combinations. # - Regular import # - Alternate Root Specified diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am index a0a0e0b5cfa5..2ebc376d9cb9 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am @@ -3,7 +3,6 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zpool_initialize_attach_detach_add_remove.ksh \ zpool_initialize_import_export.ksh \ - zpool_initialize.kshlib \ zpool_initialize_offline_export_import_online.ksh \ zpool_initialize_online_offline.ksh \ zpool_initialize_split.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh index dcca2e9335d6..b63d55d7ad64 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_active.ksh @@ -24,8 +24,8 @@ # STRATEGY: # 1. Create the pool with log device. # 2. Try clearing the label on data and log devices. -# 3. Add auxilary (cache/spare) vdevs. -# 4. Try clearing the label on auxilary vdevs. +# 3. Add auxiliary (cache/spare) vdevs. +# 4. Try clearing the label on auxiliary vdevs. # 5. Check that zpool labelclear will return non-zero and # labels are intact. diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh index a5131bdbb78b..72a555bebe07 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_exported.ksh @@ -26,8 +26,8 @@ # 2. Export the pool. # 3. Check that zpool labelclear returns non-zero when trying to # clear the label on ACTIVE vdevs, and succeeds with -f. -# 4. Add auxilary vdevs (cache/spare). -# 5. Check that zpool labelclear succeeds on auxilary vdevs of +# 4. Add auxiliary vdevs (cache/spare). +# 5. Check that zpool labelclear succeeds on auxiliary vdevs of # exported pool. verify_runnable "global" @@ -44,7 +44,7 @@ log_assert "zpool labelclear will fail on ACTIVE vdevs of exported pool and" \ for vdevtype in "" "cache" "spare"; do # Create simple pool, skip any mounts log_must zpool create -O mountpoint=none -f $TESTPOOL $disk1 log $disk2 - # Add auxilary vdevs (cache/spare) + # Add auxiliary vdevs (cache/spare) if [[ -n $vdevtype ]]; then log_must zpool add $TESTPOOL $vdevtype $disk3 fi @@ -63,7 +63,7 @@ for vdevtype in "" "cache" "spare"; do log_must zpool labelclear -f $disk2 log_mustnot zdb -lq $disk2 - # Check that labelclear on auxilary vdevs will succeed + # Check that labelclear on auxiliary vdevs will succeed if [[ -n $vdevtype ]]; then log_must zpool labelclear $disk3 log_mustnot zdb -lq $disk3 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib old mode 100755 new mode 100644 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh index 6ac748818461..097dd3c71d1c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh @@ -42,7 +42,6 @@ verify_runnable "global" function cleanup { log_must zinject -c all - rm -f $TESTFILE_MD5 2>/dev/null # bring back removed disk online for further tests insert_disk $REMOVED_DISK $scsi_host poolexists $TESTPOOL && destroy_pool $TESTPOOL @@ -64,9 +63,8 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "unavail" # 3. Write a test file to the pool and calculate its checksum. TESTFILE=/$TESTPOOL/data -TESTFILE_MD5=$(mktemp --tmpdir=/var/tmp) log_must generate_random_file /$TESTPOOL/data $LARGE_FILE_SIZE -log_must md5sum $TESTFILE > $TESTFILE_MD5 +TESTFILE_MD5=$(md5digest $TESTFILE) # 4. Execute scrub. # add delay to I/O requests for remaining disk in pool @@ -90,12 +88,13 @@ log_must is_scan_restarted $TESTPOOL # 8. Put another device offline and check if the test file checksum is correct. log_must zpool offline $TESTPOOL $DISK2 -log_must md5sum -c $TESTFILE_MD5 +CHECK_MD5=$(md5digest $TESTFILE) +[[ $CHECK_MD5 == $TESTFILE_MD5 ]] || \ + log_fail "Checksums differ ($CHECK_MD5 != $TESTFILE_MD5)" log_must zpool online $TESTPOOL $DISK2 sleep 1 # clean up -rm -f $TESTFILE_MD5 2>/dev/null log_must zpool destroy $TESTPOOL log_pass "Zpool reopen test successful" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am index aab4de0e7c89..beb59e3d066b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am @@ -3,8 +3,4 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ zpool_status_001_pos.ksh \ - zpool_status_002_pos.ksh \ - zpool_status_003_pos.ksh \ - zpool_status_-c_disable.ksh \ - zpool_status_-c_homedir.ksh \ - zpool_status_-c_searchpath.ksh + zpool_status_002_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am index c357eeffb336..d2d3b4ae88bb 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am @@ -2,7 +2,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_trim dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ - zpool_trim.kshlib \ zpool_trim_attach_detach_add_remove.ksh \ zpool_trim_import_export.ksh \ zpool_trim_multiple.ksh \ @@ -20,3 +19,6 @@ dist_pkgdata_SCRIPTS = \ zpool_trim_unsupported_vdevs.ksh \ zpool_trim_verify_checksums.ksh \ zpool_trim_verify_trimmed.ksh + +dist_pkgdata_DATA = \ + zpool_trim.kshlib diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh index adc1ba47fcc1..696c8c66cc1c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_007_pos.ksh @@ -42,7 +42,7 @@ # # STRATEGY: # 1. Import pools of all versions -# 2. Setup a test enviorment over the old pools. +# 2. Setup a test environment over the old pools. # 3. Verify the commands related to 'zfs upgrade' succeed as expected. # diff --git a/tests/zfs-tests/tests/functional/cli_user/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/Makefile.am index f1ff32e8d22d..119f8ee187f6 100644 --- a/tests/zfs-tests/tests/functional/cli_user/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_user/Makefile.am @@ -2,4 +2,5 @@ SUBDIRS = \ misc \ zfs_list \ zpool_iostat \ - zpool_list + zpool_list \ + zpool_status diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am index 29c03429091b..49138d927e06 100644 --- a/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am @@ -45,8 +45,7 @@ dist_pkgdata_SCRIPTS = \ zpool_upgrade_001_neg.ksh \ arcstat_001_pos.ksh \ arc_summary_001_pos.ksh \ - arc_summary_002_neg.ksh \ - dbufstat_001_pos.ksh + arc_summary_002_neg.ksh dist_pkgdata_DATA = \ misc.cfg diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh index bcf6a2296d57..fc0ebde10025 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/setup.ksh @@ -34,7 +34,7 @@ # This setup script is moderately complex, as it creates scenarios for all # of the tests included in this directory. Usually we'd want each test case -# to setup/teardown it's own configuration, but this would be time consuming +# to setup/teardown its own configuration, but this would be time consuming # given the nature of these tests. However, as a side-effect, one test # leaving the system in an unknown state could impact other test cases. diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh index 1073a40308b5..46171caf9fbc 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh @@ -44,16 +44,20 @@ function cleanup { - if [ -e $TEST_BASE_DIR/zfs_001_neg.$$.txt ] + if [ -e "$TEMPFILE" ] then - rm $TEST_BASE_DIR/zfs_001_neg.$$.txt + rm -f "$TEMPFILE" fi } log_onexit cleanup log_assert "zfs shows a usage message when run as a user" -eval "zfs > $TEST_BASE_DIR/zfs_001_neg.$$.txt 2>&1" -log_must grep "usage: zfs command args" $TEST_BASE_DIR/zfs_001_neg.$$.txt +TEMPFILE="$TEST_BASE_DIR/zfs_001_neg.$$.txt" + +eval "zfs > $TEMPFILE 2>&1" +log_must grep "usage: zfs command args" "$TEMPFILE" + +log_must eval "awk '{if (length(\$0) > 80) exit 1}' < $TEMPFILE" log_pass "zfs shows a usage message when run as a user" diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh index af924837ad8a..0fddc08b25db 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_001_neg.ksh @@ -45,16 +45,20 @@ function cleanup { - if [ -e $TEST_BASE_DIR/zpool_001_neg.$$.txt ] + if [ -e "$TEMPFILE" ] then - rm $TEST_BASE_DIR/zpool_001_neg.$$.txt + rm -f "$TEMPFILE" fi } +TEMPFILE="$TEST_BASE_DIR/zpool_001_neg.$$.txt" + log_onexit cleanup log_assert "zpool shows a usage message when run as a user" -eval "zpool > $TEST_BASE_DIR/zpool_001_neg.$$.txt 2>&1" -log_must grep "usage: zpool command args" $TEST_BASE_DIR/zpool_001_neg.$$.txt +eval "zpool > $TEMPFILE 2>&1" +log_must grep "usage: zpool command args" "$TEMPFILE" + +log_must eval "awk '{if (length(\$0) > 80) exit 1}' < $TEMPFILE" log_pass "zpool shows a usage message when run as a user" diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh index b89cf07ac183..cd290515357f 100755 --- a/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/misc/zpool_online_001_neg.ksh @@ -49,7 +49,7 @@ function check_for_online | grep ONLINE ) if [ -n "$RESULT" ] then - log_fail "A disk was brough online!" + log_fail "A disk was brought online!" fi } diff --git a/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh index d881b831ffbc..8e9009bd5500 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh @@ -57,9 +57,8 @@ function cleanup log_onexit cleanup log_assert "'zfs list -d ' should get expected output." -mntpnt=/var/tmp -DEPTH_OUTPUT="$mntpnt/depth_output" -EXPECT_OUTPUT="$mntpnt/expect_output" +DEPTH_OUTPUT="$TEST_BASE_DIR/depth_output" +EXPECT_OUTPUT="$TEST_BASE_DIR/expect_output" typeset -i old_val=0 typeset -i j=0 typeset -i fs=0 diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh index 1ae91c1a8434..53652ec11b5a 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_005_pos.ksh @@ -68,7 +68,7 @@ for i in $files ; do test_zpool_script "$i" "$testpool" "zpool iostat -Pv -c" done -# Test that we can run multiple scripts separated with a commma by running +# Test that we can run multiple scripts separated with a comma by running # all the scripts in a single -c line. allscripts="$(echo $scripts | sed -r 's/[[:blank:]]+/,/g')" test_zpool_script "$allscripts" "$testpool" "zpool iostat -Pv -c" diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am new file mode 100644 index 000000000000..e1b339657749 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_user/zpool_status +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_status_003_pos.ksh \ + zpool_status_-c_disable.ksh \ + zpool_status_-c_homedir.ksh \ + zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh similarity index 96% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh index c5e0c6e474a5..fa7d3f3f2d56 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh @@ -68,7 +68,7 @@ for i in $files ; do test_zpool_script "$i" "$testpool" "zpool status -P -c" done -# Test that we can run multiple scripts separated with a commma by running +# Test that we can run multiple scripts separated with a comma by running # all the scripts in a single -c line. allscripts="$(echo $scripts | sed -r 's/[[:blank:]]+/,/g')" test_zpool_script "$allscripts" "$testpool" "zpool status -P -c" diff --git a/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh b/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh index c2c911020410..45fdb5b85692 100755 --- a/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh @@ -36,7 +36,7 @@ # zfs allow can deal with invalid arguments.(Invalid options or combination) # # STRATEGY: -# 1. Verify invalid argumets will cause error. +# 1. Verify invalid arguments will cause error. # 2. Verify non-optional argument was missing will cause error. # 3. Verify invalid options cause error. # diff --git a/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh b/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh index ac031ed6a52f..2f2802bc65a3 100755 --- a/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/devices/devices_001_pos.ksh @@ -42,7 +42,7 @@ # 1. Create pool and file system. # 2. Set devices=on on this file system. # 3. Separately create block device file and character file. -# 4. Separately read from those two device files. +# 4. Separately read and write from those two device files. # 5. Check the return value, and make sure it succeeds. # @@ -55,12 +55,18 @@ log_onexit cleanup log_must zfs set devices=on $TESTPOOL/$TESTFS # -# Separately create block device file and character device file, then try to -# open them and make sure it succeed. +# Create block device file backed by a ZFS volume. +# Verify it can be opened, written, and read. # -create_dev_file b $TESTDIR/$TESTFILE1 -log_must dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out count=1 +create_dev_file b $TESTDIR/$TESTFILE1 $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL +log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE1.out1 count=1 bs=128k +log_must dd if=$TESTDIR/$TESTFILE1.out1 of=$TESTDIR/$TESTFILE1 count=1 bs=128k +log_must dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out2 count=1 bs=128k +log_must cmp $TESTDIR/$TESTFILE1.out1 $TESTDIR/$TESTFILE1.out2 + +# Create character device file backed by /dev/null +# Verify it can be opened and written. create_dev_file c $TESTDIR/$TESTFILE2 -log_must dd if=$TESTDIR/$TESTFILE2 of=$TESTDIR/$TESTFILE2.out count=1 +log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE2 count=1 bs=128k log_pass "Setting devices=on on file system and testing it pass." diff --git a/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh b/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh index ce25502b818b..a768c4aa6b34 100755 --- a/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/devices/devices_002_neg.ksh @@ -42,7 +42,7 @@ # 1. Create pool and file system. # 2. Set devices=off on this file system. # 3. Separately create block device file and character file. -# 4. Separately read from those two device files. +# 4. Separately read and write from those two device files. # 5. Check the return value, and make sure it failed. # @@ -55,12 +55,16 @@ log_onexit cleanup log_must zfs set devices=off $TESTPOOL/$TESTFS # -# Separately create block device file and character device file, then try to -# open them and make sure it failed. +# Create block device file backed by a ZFS volume. +# Verify it cannot be opened, written, and read. # -create_dev_file b $TESTDIR/$TESTFILE1 -log_mustnot dd if=$TESTDIR/$TESTFILE1 of=$TESTDIR/$TESTFILE1.out count=1 +create_dev_file b $TESTDIR/$TESTFILE1 $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL +log_mustnot dd if=/dev/urandom of=$TESTDIR/$TESTFILE1 count=1 bs=128k +log_mustnot dd if=$TESTDIR/$TESTFILE1 of=/dev/null count=1 bs=128k + +# Create character device file backed by /dev/null +# Verify it cannot be opened and written. create_dev_file c $TESTDIR/$TESTFILE2 -log_mustnot dd if=$TESTDIR/$TESTFILE2 of=$TESTDIR/$TESTFILE2.out count=1 +log_mustnot dd if=/dev/urandom of=$TESTDIR/$TESTFILE2 count=1 bs=128k log_pass "Setting devices=off on file system and testing it pass." diff --git a/tests/zfs-tests/tests/functional/devices/devices_common.kshlib b/tests/zfs-tests/tests/functional/devices/devices_common.kshlib index 2c7df8d058c3..fa7fdbecf5fd 100644 --- a/tests/zfs-tests/tests/functional/devices/devices_common.kshlib +++ b/tests/zfs-tests/tests/functional/devices/devices_common.kshlib @@ -36,89 +36,74 @@ # # $1 device file type # $2 file name +# $3 device path (used for 'b' device type) # function create_dev_file { typeset filetype=$1 typeset filename=$2 + typeset devstr=$3 case $filetype in - b) - if is_linux; then - major=$(awk '/[hsv]d/ { print $1; exit }' \ - /proc/partitions) - minor=$(awk '/[hsv]d/ { print $2; exit }' \ - /proc/partitions) - log_must mknod $filename b $major $minor - return 0 - fi - - devtype=$(df -n / | awk '{print $3}') - case $devtype in - zfs) - rootpool=$(df / | \ - awk '{print $2}') - rootpool=${rootpool#\(} - rootpool=${rootpool%%/*} - - devstr=$(get_disklist $rootpool) - devstr=$(echo "$devstr" | \ - awk '{print $1}') - [[ -z $devstr ]] && \ - log_fail "Can not get block device file." - devstr=$DEV_DSKDIR/${devstr} - ;; - ufs) + b) + case $(uname) in + Linux) # - # Get the existing block device file in current system. - # And bring out the first one. + # stat(1) --format=FORMAT tokens + # %t - major device type in hex + # %T - minor device type in hex # - devstr=$(df-lhF ufs | \ - grep "^${DEV_DSKDIR}" | \ - awk '{print $1}') - devstr=$(echo "$devstr" | \ - awk '{print $1}') - [[ -z $devstr ]] && \ - log_fail "Can not get block device file." - ;; - *) - log_unsupported "Unsupported fstype " \ - "for / ($devtype)," \ - "only ufs|zfs is supported." - ;; - esac - + major=$(stat --dereference --format="%t" "$devstr") + minor=$(stat --dereference --format="%T" "$devstr") + log_must mknod $filename b "0x${major}" "0x${minor}" + ;; + *) # # Get the device file information. i.e: - # $DEV_DSKDIR/c0t0d0s0: block special (28/768) + # $devstr: block special (28/768) # devstr=$(file $devstr) - - # - # Bring out major and minor number. - # major=${devstr##*\(} major=${major%%/*} minor=${devstr##*/} minor=${minor%\)} - log_must mknod $filename b $major $minor ;; - c) + esac + ;; + c) + # + # Create device file '/dev/null', $devstr is unused. + # + case $(uname) in + Linux) + # + # stat(1) --format=FORMAT tokens + # %t - major device type in hex + # %T - minor device type in hex + # + major=$(stat --format="%t" /dev/null) + minor=$(stat --format="%T" /dev/null) + log_must mknod $filename c "0x${major}" "0x${minor}" + ;; + FreeBSD) # # Create device file '/dev/null' # - if is_linux; then - major=$(stat -c %t /dev/null) - minor=$(stat -c %T /dev/null) - log_must mknod $filename c $major $minor - else - log_must mknod $filename c $(getmajor mm) 2 - fi + major=13 + minor=2 + log_must mknod $filename b $major $minor ;; *) - log_fail "'$filetype' is wrong." + major=$(getmajor mm) + minor=2 + log_must mknod $filename b $major $minor ;; + esac + ;; + *) + log_fail "'$filetype' is wrong." + ;; esac return 0 @@ -129,6 +114,6 @@ function cleanup log_must zfs set devices=on $TESTPOOL/$TESTFS log_must rm -f $TESTDIR/$TESTFILE1 log_must rm -f $TESTDIR/$TESTFILE2 - log_must rm -f $TESTDIR/$TESTFILE1.out - log_must rm -f $TESTDIR/$TESTFILE2.out + log_must rm -f $TESTDIR/$TESTFILE1.out1 + log_must rm -f $TESTDIR/$TESTFILE1.out2 } diff --git a/tests/zfs-tests/tests/functional/devices/setup.ksh b/tests/zfs-tests/tests/functional/devices/setup.ksh index fc5cec3063a6..ee6cf83acb9e 100755 --- a/tests/zfs-tests/tests/functional/devices/setup.ksh +++ b/tests/zfs-tests/tests/functional/devices/setup.ksh @@ -32,4 +32,4 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} -default_setup $DISK +default_volume_setup $DISK diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh index bc925bc91c81..03fc15a8a7cb 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh @@ -129,7 +129,7 @@ do typeset -i timeout=0 while true; do if ((timeout == $MAXTIMEOUT)); then - log_fail "Timeout occured" + log_fail "Timeout occurred" fi ((timeout++)) diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh index 8650ceff7d16..25c23aecc308 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh @@ -116,7 +116,7 @@ for type in "mirror" "raidz" "raidz2" "raidz3"; do done # Rinse and repeat, this time faulting both devices at the same time -# NOTE: "raidz" is exluded since it cannot survive 2 faulted devices +# NOTE: "raidz" is excluded since it cannot survive 2 faulted devices # NOTE: "mirror" is a 4-way mirror here and should survive this test for type in "mirror" "raidz2" "raidz3"; do # 1. Create a pool with two hot spares diff --git a/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh index a5b58ec8ff24..db4a4ad55ef1 100755 --- a/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh +++ b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh @@ -42,6 +42,7 @@ function cleanup # Restore our zed.rc log_must zed_rc_restore $zedrc_backup default_cleanup_noexit + log_must zpool labelclear -f $DISK1 } log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh b/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh index b6a3e71fdfaf..c919ae608513 100755 --- a/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh +++ b/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh @@ -41,14 +41,14 @@ DISK=${DISKS%% *} verify_runnable "both" -log_must zpool create $TESTPOOL mirror ${DISKS} +default_mirror_setup_noexit $DISKS function cleanup { log_must zinject -c all log_must set_tunable64 zio_slow_io_ms $OLD_SLOW_IO log_must set_tunable64 zfs_slow_io_events_per_second $OLD_SLOW_IO_EVENTS - log_must destroy_pool $TESTPOOL + default_cleanup_noexit } log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh index eac292cbe064..71e175171322 100755 --- a/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_008_pos.ksh @@ -39,7 +39,7 @@ verify_runnable "both" function cleanup { - datasetexists $TEST_FS && log_must zfs destroy $TEST_FS + datasetexists $TEST_FS && destroy_dataset $TEST_FS } function verify_dnode_packing @@ -71,6 +71,7 @@ for ((i=0; i < 100; i++)); do done log_must wait +sync_pool $TESTPOOL verify_dnode_packing diff --git a/tests/zfs-tests/tests/functional/history/history_001_pos.ksh b/tests/zfs-tests/tests/functional/history/history_001_pos.ksh index e22aaa33dbc6..f33265185d5c 100755 --- a/tests/zfs-tests/tests/functional/history/history_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_001_pos.ksh @@ -115,7 +115,7 @@ import_dir=$TEST_BASE_DIR/import_dir.$$ log_must mkdir $import_dir log_must cp $STF_SUITE/tests/functional/history/zfs-pool-v4.dat.Z $import_dir log_must uncompress $import_dir/zfs-pool-v4.dat.Z -upgrade_pool=$(zpool import -d $import_dir | grep "pool:" | awk '{print $2}') +upgrade_pool=$(zpool import -d $import_dir | awk '/pool:/ { print $2 }') log_must zpool import -d $import_dir $upgrade_pool run_and_verify -p "$upgrade_pool" "zpool upgrade $upgrade_pool" diff --git a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh index 4ecee3ba0c54..46af53f8af90 100755 --- a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh @@ -65,9 +65,7 @@ log_must zpool create $spool $VDEV0 log_must zfs create $spool/$sfs typeset -i orig_count=$(zpool history $spool | wc -l) -typeset orig_md5=$(zpool history $spool | head -2 | md5sum | \ - awk '{print $1}') - +typeset orig_md5=$(zpool history $spool | head -2 | md5digest) typeset -i i=0 while ((i < 300)); do zfs set compression=off $spool/$sfs @@ -82,7 +80,7 @@ done TMPFILE=$TEST_BASE_DIR/spool.$$ zpool history $spool >$TMPFILE typeset -i entry_count=$(wc -l $TMPFILE | awk '{print $1}') -typeset final_md5=$(head -2 $TMPFILE | md5sum | awk '{print $1}') +typeset final_md5=$(head -2 $TMPFILE | md5digest) grep 'zpool create' $TMPFILE >/dev/null 2>&1 || log_fail "'zpool create' was not found in pool history" diff --git a/tests/zfs-tests/tests/functional/history/history_005_neg.ksh b/tests/zfs-tests/tests/functional/history/history_005_neg.ksh index f6a81a4ac5f2..297a701cc567 100755 --- a/tests/zfs-tests/tests/functional/history/history_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/history/history_005_neg.ksh @@ -42,9 +42,9 @@ # zpool iostat # # STRATEGY: -# 1. Create a test pool. +# 1. Create a test pool # 2. Separately invoke zpool list|status|iostat -# 3. Verify they was not recored in pool history. +# 3. Verify they were not recorded in pool history # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/history/history_006_neg.ksh b/tests/zfs-tests/tests/functional/history/history_006_neg.ksh index a2da831c5cce..e97adc4e3ce0 100755 --- a/tests/zfs-tests/tests/functional/history/history_006_neg.ksh +++ b/tests/zfs-tests/tests/functional/history/history_006_neg.ksh @@ -40,7 +40,7 @@ # STRATEGY: # 1. Create a test pool. # 2. Separately invoke zfs list|get|holds|mount|unmount|share|unshare|send -# 3. Verify they were not recored in pool history. +# 3. Verify they were not recorded in pool history. # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/history/history_007_pos.ksh b/tests/zfs-tests/tests/functional/history/history_007_pos.ksh index b65e855d8c70..d1c92c5e7c20 100755 --- a/tests/zfs-tests/tests/functional/history/history_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_007_pos.ksh @@ -83,7 +83,7 @@ for arch in "i386" "sparc"; do TZ=$TIMEZONE zpool history $migratedpoolname | grep -v "^$" \ >$migrated_cmds_f RET=$? - (( $RET != 0 )) && log_fail "zpool histroy $migratedpoolname fails." + (( $RET != 0 )) && log_fail "zpool history $migratedpoolname fails." # The migrated history file should differ with original history file on # two commands -- 'export' and 'import', which are included in migrated diff --git a/tests/zfs-tests/tests/functional/history/history_common.kshlib b/tests/zfs-tests/tests/functional/history/history_common.kshlib index 80af2e903daa..b82c60cbb693 100644 --- a/tests/zfs-tests/tests/functional/history/history_common.kshlib +++ b/tests/zfs-tests/tests/functional/history/history_common.kshlib @@ -110,7 +110,7 @@ function verify_long fi typeset suffix="" - if [ is_linux ]; then + if is_linux; then suffix=":linux" fi @@ -224,7 +224,7 @@ function verify_allow # # Here, we determine three things: - # - Whether we're operating on a set or an indivdual permission (which + # - Whether we're operating on a set or an individual permission (which # dictates the case of the first character in the code) # - The name of the dataset we're operating on. # - Whether the operation applies locally or to descendent datasets (or diff --git a/tests/zfs-tests/tests/functional/hkdf/Makefile.am b/tests/zfs-tests/tests/functional/hkdf/Makefile.am index d0a68f442fab..b54e353cd963 100644 --- a/tests/zfs-tests/tests/functional/hkdf/Makefile.am +++ b/tests/zfs-tests/tests/functional/hkdf/Makefile.am @@ -2,8 +2,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include AM_CPPFLAGS += -I$(top_srcdir)/lib/libspl/include -LDADD = $(top_srcdir)/lib/libicp/libicp.la -LDADD += $(top_srcdir)/lib/libzpool/libzpool.la +LDADD = $(top_builddir)/lib/libzpool/libzpool.la AUTOMAKE_OPTIONS = subdir-objects diff --git a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh index 76bd05ce57de..b1c24fa3a74f 100755 --- a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh @@ -401,18 +401,17 @@ set -A local_val "off" "on" "off" \ # # Add system specific values # - -if ! is_linux; then +if is_linux; then + prop+=("acltype" "") + def_val+=("off") + local_val+=("off") +else prop+=("aclmode" "" \ "mountpoint" "") def_val+=("discard" \ "") local_val+=("groupmask" \ "$TESTDIR") -else - prop+=("acltype" "") - def_val+=("off") - local_val+=("off") fi diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh index 63c68e66e4e4..aecdc5a3b078 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh @@ -80,7 +80,7 @@ dumpdev=`dumpadm | grep "Dump device" | awk '{print $3}'` [[ -z "$dumpdev" ]] && log_untested "No dump device has been configured" [[ "$dumpdev" != "$diskslice" ]] && \ - log_untested "Dump device has not been been configured to $diskslice" + log_untested "Dump device has not been configured to $diskslice" log_note "Attempt to zpool the dump device" unset NOINUSE_CHECK diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh index 95d505f35bf8..b126f66a0c3e 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh @@ -48,8 +48,8 @@ verify_runnable "global" function cleanup { # - # Essentailly this is the default_cleanup routine but I cannot get it - # to work correctly. So its reproduced below. Still need to full + # Essentially this is the default_cleanup routine but I cannot get it + # to work correctly. So its reproduced below. Still need to fully # understand why default_cleanup does not work correctly from here. # log_must zfs umount $TESTPOOL/$TESTFS diff --git a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh index ddc8fa7a49c2..a08beb8b251f 100755 --- a/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh @@ -69,7 +69,7 @@ function verify_assertion #slices echo "y" | newfs -v $t > /dev/null 2>&1 (( $? !=0 )) && \ log_fail "newfs over exported pool " \ - "failes unexpected." + "fails unexpectedly." done return 0 @@ -107,7 +107,6 @@ while (( i < ${#vdevs[*]} )); do create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices log_must zpool export $TESTPOOL1 verify_assertion "$rawtargets" - cleanup_devices $vslices $sslices (( i = i + 1 )) done diff --git a/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh b/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh index 3be20356ea0e..f59603724e76 100755 --- a/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/large_files/large_files_001_pos.ksh @@ -38,7 +38,7 @@ # STRATEGY: # 1. largest_file will write to a file and increase its size # to the maximum allowable. -# 2. The last byte of the file should be accessbile without error. +# 2. The last byte of the file should be accessible without error. # 3. Writing beyond the maximum file size generates an 'errno' of # EFBIG. # diff --git a/tests/zfs-tests/tests/functional/link_count/Makefile.am b/tests/zfs-tests/tests/functional/link_count/Makefile.am index 669f3c142c8c..bfb7154a6518 100644 --- a/tests/zfs-tests/tests/functional/link_count/Makefile.am +++ b/tests/zfs-tests/tests/functional/link_count/Makefile.am @@ -2,4 +2,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/link_count dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ - link_count_001.ksh + link_count_001.ksh \ + link_count_root_inode.ksh diff --git a/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh b/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh new file mode 100755 index 000000000000..d2bf30ac37c2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/link_count/link_count_root_inode.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify root inode (directory) has correct link count. +# +# STRATEGY: +# 1. Create pool and fs. +# 2. Test link count of root inode. +# 3. Create directories and test link count of root inode. +# 4. Delete directories and test link count of root inode. +# 5. Create regular file and test link count of root inode. +# 6. Delete regular file and test link count of root inode. +# + +function assert_link_count +{ + typeset dirpath="$1" + typeset value="$2" + + log_must test "$(ls -ld $dirpath | awk '{ print $2 }')" == "$value" +} + +verify_runnable "both" + +log_note "Verify root inode (directory) has correct link count." + +# Delete a directory from link_count_001.ksh. +if [ -d "${TESTDIR}" -a -d "${TESTDIR}/tmp" ]; then + log_must rm -rf ${TESTDIR}/tmp +fi + +# +# Test with hidden '.zfs' directory. +# This also tests general directories. +# +log_note "Testing with snapdir set to hidden (default)" + +for dst in $TESTPOOL $TESTPOOL/$TESTFS +do + typeset mtpt=$(get_prop mountpoint $dst) + log_must zfs set snapdir=hidden $dst + log_must test -d "$mtpt/.zfs" + if test -n "$(ls $mtpt)"; then + ls $mtpt + log_note "$mtpt not empty, skipping" + continue + fi + assert_link_count $mtpt 2 + + log_must mkdir $mtpt/a + assert_link_count $mtpt 3 + log_must rmdir $mtpt/a + assert_link_count $mtpt 2 + + log_must mkdir -p $mtpt/a/b + assert_link_count $mtpt 3 + log_must rmdir $mtpt/a/b + log_must rmdir $mtpt/a + assert_link_count $mtpt 2 + + log_must touch $mtpt/a + assert_link_count $mtpt 2 + log_must rm $mtpt/a + assert_link_count $mtpt 2 +done + +# +# Test with visible '.zfs' directory. +# +log_note "Testing with snapdir set to visible" + +for dst in $TESTPOOL $TESTPOOL/$TESTFS +do + typeset mtpt=$(get_prop mountpoint $dst) + log_must zfs set snapdir=visible $dst + log_must test -d "$mtpt/.zfs" + if test -n "$(ls $mtpt)"; then + ls $mtpt + log_note "$mtpt not empty, skipping" + continue + fi + assert_link_count $mtpt 3 + + log_must mkdir $mtpt/a + assert_link_count $mtpt 4 + log_must rmdir $mtpt/a + assert_link_count $mtpt 3 + + log_must mkdir -p $mtpt/a/b + assert_link_count $mtpt 4 + log_must rmdir $mtpt/a/b + log_must rmdir $mtpt/a + assert_link_count $mtpt 3 + + log_must touch $mtpt/a + assert_link_count $mtpt 3 + log_must rm $mtpt/a + assert_link_count $mtpt 3 +done + +log_pass "Verify root inode (directory) has correct link count passed" diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh index 24150b827f8f..2f4257993d4a 100755 --- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# Writing to a file and mmaping that file at the +# Writing to a file and mmapping that file at the # same time does not result in a deadlock. # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/mmp/Makefile.am b/tests/zfs-tests/tests/functional/mmp/Makefile.am index e39a0a5aac8e..2848fd4ce692 100644 --- a/tests/zfs-tests/tests/functional/mmp/Makefile.am +++ b/tests/zfs-tests/tests/functional/mmp/Makefile.am @@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \ mmp_reset_interval.ksh \ mmp_on_zdb.ksh \ mmp_write_distribution.ksh \ + mmp_hostid.ksh \ setup.ksh \ cleanup.ksh diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh new file mode 100755 index 000000000000..e3c6e34f4bc0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh @@ -0,0 +1,94 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify the hostid file can reside on a ZFS dataset. +# +# STRATEGY: +# 1. Create a non-redundant pool +# 2. Create an 'etc' dataset containing a valid hostid file +# 3. Create a file so the pool will have some contents +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +# 5. Verify vdevs may be attached and detached +# 6. Verify normal, cache, log and special vdevs can be added +# 7. Verify normal, cache, and log vdevs can be removed +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must rm $MMP_DIR/file.{0,1,2,3,4,5} + log_must rmdir $MMP_DIR + log_must mmp_clear_hostid + if [[ -L $HOSTID_FILE ]]; then + rm -f $HOSTID_FILE + fi +} + +log_assert "Verify hostid file can reside on a ZFS dataset" +log_onexit cleanup + +log_must mkdir -p $MMP_DIR +log_must truncate -s $MINVDEVSIZE $MMP_DIR/file.{0,1,2,3,4,5} + +# 1. Create a non-redundant pool +log_must zpool create $MMP_POOL $MMP_DIR/file.0 + +# 2. Create an 'etc' dataset containing a valid hostid file; caching is +# disabled on the dataset to force the hostid to be read from disk. +log_must zfs create -o primarycache=none -o secondarycache=none $MMP_POOL/etc +mntpnt_etc=$(get_prop mountpoint $MMP_POOL/etc) +log_must mmp_set_hostid $HOSTID1 +log_must mv $HOSTID_FILE $mntpnt_etc/hostid + +# 3. Create a file so the pool will have some contents +log_must zfs create $MMP_POOL/fs +mntpnt_fs=$(get_prop mountpoint $MMP_POOL/fs) +log_must mkfile 1M $mntpnt_fs/file + +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +log_mustnot zpool set multihost=on $MMP_POOL +log_mustnot ls -l $HOSTID_FILE +log_must ln -s $mntpnt_etc/hostid $HOSTID_FILE +log_must zpool set multihost=on $MMP_POOL + +# 5. Verify vdevs may be attached and detached +log_must zpool attach $MMP_POOL $MMP_DIR/file.0 $MMP_DIR/file.1 +log_must zpool detach $MMP_POOL $MMP_DIR/file.1 + +# 6. Verify normal, cache, log and special vdevs can be added +log_must zpool add $MMP_POOL $MMP_DIR/file.1 +log_must zpool add $MMP_POOL $MMP_DIR/file.2 +log_must zpool add $MMP_POOL cache $MMP_DIR/file.3 +log_must zpool add $MMP_POOL log $MMP_DIR/file.4 +log_must zpool add $MMP_POOL special $MMP_DIR/file.5 + +# 7. Verify normal, cache, and log vdevs can be removed +log_must zpool remove $MMP_POOL $MMP_DIR/file.2 +log_must zpool remove $MMP_POOL $MMP_DIR/file.3 +log_must zpool remove $MMP_POOL $MMP_DIR/file.4 + +log_pass "Verify hostid file can reside on a ZFS dataset." diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh index bf1eb54a7389..9c4552b0cfb0 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh @@ -66,11 +66,11 @@ UBER_CHANGES=$(count_mmp_writes $TESTPOOL 10) log_note "Uberblock changed $UBER_CHANGES times" if [ $UBER_CHANGES -lt $MIN_UB_WRITES ]; then - log_fail "Fewer uberblock writes occured than expected ($EXPECTED)" + log_fail "Fewer uberblock writes occurred than expected ($EXPECTED)" fi if [ $UBER_CHANGES -gt $MAX_UB_WRITES ]; then - log_fail "More uberblock writes occured than expected ($EXPECTED)" + log_fail "More uberblock writes occurred than expected ($EXPECTED)" fi log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN diff --git a/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh b/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh index b3df69141fe7..b1eeaf2cc569 100755 --- a/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh +++ b/tests/zfs-tests/tests/functional/no_space/enospc_df.ksh @@ -58,7 +58,7 @@ log_must zfs umount $TESTPOOL/$TESTFS # Ensure the pool root filesystem shows in df output. # If the pool was full (available == 0) and the pool -# root filesytem had very little in it (used < 1 block), +# root filesystem had very little in it (used < 1 block), # the size reported to df was zero (issue #8253) and # df skipped the filesystem in its output. log_must eval "df -h | grep $TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh b/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh index c9d7b59b344b..bd38883d7578 100755 --- a/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh +++ b/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh @@ -24,7 +24,7 @@ # # Strategy: # 1. Create an origin fs with compression and sha256. -# 2. Clone origin such that it inherits the properies. +# 2. Clone origin such that it inherits the properties. # 3. Use dd with the sync flag to test the sync write path. # diff --git a/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh b/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh index ad96d5dcb637..514a05984160 100755 --- a/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh +++ b/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_removal.ksh @@ -52,7 +52,7 @@ populate_test_pool # # Create big empty file and do some writes at random # offsets to ensure that it takes up space. Note that -# the implcitly created filesystem ($FS0) does not +# the implicitly created filesystem ($FS0) does not # have compression enabled. # log_must mkfile $BIGFILESIZE $FS0FILE diff --git a/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib b/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib index 6e410e0c85f8..ea6c03e9d59d 100644 --- a/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib +++ b/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib @@ -27,7 +27,7 @@ # This is why these tests run directly on pools that use a # "real disk vdev" (meaning not a file based one). These tests # use the $TESTPOOL pool that is created on top of $TESTDISK. -# This pool is refered to as the "test pool" and thus all +# This pool is referred to as the "test pool" and thus all # the tests of this group use the testpool-related functions of # this file (not the nested_pools ones). # diff --git a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh index a3afe0c429de..f4df839be637 100755 --- a/tests/zfs-tests/tests/functional/procfs/pool_state.ksh +++ b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh @@ -105,8 +105,10 @@ check_all $TESTPOOL "ONLINE" # Fault one of the disks, and check that pool is degraded DISK1=$(echo "$DISKS" | awk '{print $2}') -zpool offline -tf $TESTPOOL $DISK1 +log_must zpool offline -tf $TESTPOOL $DISK1 check_all $TESTPOOL "DEGRADED" +log_must zpool online $TESTPOOL $DISK1 +log_must zpool clear $TESTPOOL # Create a new pool out of a scsi_debug disk TESTPOOL2=testpool2 @@ -137,7 +139,7 @@ remove_disk $SDISK # background since the command will hang when the pool gets suspended. The # command will resume and exit after we restore the missing disk later on. zpool scrub $TESTPOOL2 & -sleep 1 # Give the scrub some time to run before we check if it fails +sleep 3 # Give the scrub some time to run before we check if it fails log_must check_all $TESTPOOL2 "SUSPENDED" diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh index c9eff3649ca4..88911aac6e8e 100755 --- a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh +++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh @@ -48,7 +48,7 @@ function cleanup function count_snap_cmds { typeset expected_count=$1 - count=$(grep "command: zfs snapshot $FS@testsnapshot" | wc -l) + count=$(grep -E "command: (lt-)?zfs snapshot $FS@testsnapshot" | wc -l) log_must eval "[[ $count -eq $expected_count ]]" } diff --git a/tests/zfs-tests/tests/functional/procfs/setup.ksh b/tests/zfs-tests/tests/functional/procfs/setup.ksh index b3812dbdc640..3444cfcf2f43 100755 --- a/tests/zfs-tests/tests/functional/procfs/setup.ksh +++ b/tests/zfs-tests/tests/functional/procfs/setup.ksh @@ -31,4 +31,3 @@ if ! is_linux ; then fi default_mirror_setup $DISKS -log_pass diff --git a/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh index 44af9941b929..46e79062a0e2 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectid_001_pos.ksh @@ -38,8 +38,8 @@ # # # STRATEGY: -# 1. Create a regular file and a directroy. -# 2. Set project ID on both directroy and regular file. +# 1. Create a regular file and a directory. +# 2. Set project ID on both directory and regular file. # 3. New created subdir or regular file should inherit its parent's # project ID if its parent has project inherit flag. # 4. New created subdir should inherit its parent project's inherit flag. diff --git a/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh index 1a402e298b99..e382f464046b 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectid_002_pos.ksh @@ -41,7 +41,7 @@ # 1. Create three directories # 2. Set tdir1 and tdir3 project ID as PRJID1, # set tdir2 project ID as PRJID2. -# 3. Create regular file under tdir1. It inherits tdir1 proejct ID. +# 3. Create regular file under tdir1. It inherits tdir1 project ID. # 4. Hardlink from tdir1's child to tdir2 should be denied, # move tdir1's child to tdir2 will be object recreated. # 5. Hardlink from tdir1's child to tdir3 should succeed. diff --git a/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh b/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh index df0eda7d770a..a975d2a19f0c 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh @@ -62,7 +62,7 @@ for prj in "${no_prjs[@]}"; do log_mustnot zfs set projectquota@$prj=100m $QFS done -log_note "can set all numberic id even that id is not existed" +log_note "can set all numeric id even if that id does not exist" log_must zfs set projectquota@12345678=100m $QFS set -A sizes "100mfsd" "m0.12m" "GGM" "-1234-m" "123m-m" diff --git a/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh index 494d7f3b7ac0..ec299e0e7f93 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh @@ -38,7 +38,7 @@ # # STRATEGY: # 1. set project [obj]quota on the directory -# 2. set project ID and inherit flag on the directoty +# 2. set project ID and inherit flag on the directory # 3. run 'df [-i]' on the directory and check the result # diff --git a/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh index 4008811a19e1..d61019242703 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projecttree_002_pos.ksh @@ -39,7 +39,7 @@ # # STRATEGY: # 1. Create a tree with 4 level directories. -# 2. Set project ID on both directroy and regular file via +# 2. Set project ID on both directory and regular file via # "zfs project -p". # 3. Check the project ID via "zfs project". # 4. Set project inherit flag on kinds of level directories (and its diff --git a/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh b/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh index 33382fdbe92d..cbc45857f779 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projecttree_003_neg.ksh @@ -43,8 +43,8 @@ # 2. "-C" only supports "-r" and "-k". # 3. "-s" only supports "-r" and "-p". # 4. "-c", "-C" and "-s" can NOT be specified together. -# 5. "-d" can overwirte former "-r". -# 6. "-r" can overwirte former "-d". +# 5. "-d" can overwrite former "-r". +# 6. "-r" can overwrite former "-d". # 7. "-0" must be together with "-c". # 8. "-d" must be on directory. # 9. "-r" must be on directory. diff --git a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am index 0a27adeccaf4..4d99285e49ca 100644 --- a/tests/zfs-tests/tests/functional/pyzfs/Makefile.am +++ b/tests/zfs-tests/tests/functional/pyzfs/Makefile.am @@ -7,7 +7,7 @@ EXTRA_DIST = \ # # The pyzfs module is built either for Python 2 or Python 3. In order -# to properly test it the unit tests must be updated to the matching vesion. +# to properly test it the unit tests must be updated to the matching version. # $(pkgpyzfs_SCRIPTS):%:%.in -$(SED) -e 's,@PYTHON\@,$(PYTHON),g' \ diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh index 4c105b9411c1..0f88a1a51468 100755 --- a/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh @@ -35,4 +35,4 @@ log_mustnot raidz_test -T -log_pass "raidz_test detects errors as espected." +log_pass "raidz_test detects errors as expected." diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh b/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh index e25a48be8df3..b5557f1f7e44 100755 --- a/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh @@ -41,7 +41,7 @@ # 3. Fill the filesystem with directories and files. # 4. Record all the files and directories checksum information. # 5. Damaged one of the virtual disk file. -# 6. Verify the data is correct to prove raidz can withstand 1 devicd is +# 6. Verify the data is correct to prove raidz can withstand 1 device is # failing. # diff --git a/tests/zfs-tests/tests/functional/refquota/Makefile.am b/tests/zfs-tests/tests/functional/refquota/Makefile.am index 5f7c7b68690f..1d8418fbbec5 100644 --- a/tests/zfs-tests/tests/functional/refquota/Makefile.am +++ b/tests/zfs-tests/tests/functional/refquota/Makefile.am @@ -7,4 +7,6 @@ dist_pkgdata_SCRIPTS = \ refquota_003_pos.ksh \ refquota_004_pos.ksh \ refquota_005_pos.ksh \ - refquota_006_neg.ksh + refquota_006_neg.ksh \ + refquota_007_neg.ksh \ + refquota_008_neg.ksh diff --git a/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh new file mode 100755 index 000000000000..4f0393883b6a --- /dev/null +++ b/tests/zfs-tests/tests/functional/refquota/refquota_007_neg.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. + +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# refquota limits the amount of space a dataset can consume, +# snapshot rollback should be limited by refquota. +# +# STRATEGY: +# 1. Create a file in a filesystem +# 2. Create a snapshot of the filesystem +# 3. Remove the file +# 4. Set a refquota of size half of the file +# 5. Rollback the filesystem from the snapshot +# 6. Rollback should fail +# + +verify_runnable "both" + +function cleanup +{ + log_must zfs destroy -rf $TESTPOOL/$TESTFS + log_must zfs create $TESTPOOL/$TESTFS + log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS +} + +log_onexit cleanup + +TESTFILE='testfile' +FS=$TESTPOOL/$TESTFS + +mntpnt=$(get_prop mountpoint $FS) +log_must mkfile 20M $mntpnt/$TESTFILE +log_must zfs snapshot $FS@snap20M +log_must rm $mntpnt/$TESTFILE + +log_must sync + +log_must zfs set refquota=10M $FS +log_mustnot zfs rollback $FS@snap20M + +log_pass "The rollback to the snapshot was restricted by refquota." diff --git a/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh b/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh new file mode 100755 index 000000000000..e7f40ec71767 --- /dev/null +++ b/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. + +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# refquota limits the amount of space a dataset can consume, +# This test verifies that zfs receive does not override +# refquota. +# +# STRATEGY: +# 1. Create a sub-filesystem $TESTSUBFS1 +# 2. Create a file in the sub-filesystem $TESTSUBFS1 +# 3. Create a snapshot of the sub-filesystem $TESTSUBFS1 +# 4. Create another sub-filesystem $TESTSUBFS2 +# 5. Apply a refquota value to $TESTSUBFS2, +# half the sub-filesystem $TESTSUBFS1 file size +# 6. Verify that zfs receive of the snapshot of $TESTSUBFS1 +# fails due to refquota +# + +verify_runnable "both" + +oldvalue=$(get_tunable spa_asize_inflation) +function cleanup +{ + set_tunable32 spa_asize_inflation $oldvalue + log_must zfs destroy -rf $TESTPOOL/$TESTFS + log_must zfs create $TESTPOOL/$TESTFS + log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS +} + +log_onexit cleanup + +set_tunable32 spa_asize_inflation 2 + +TESTFILE='testfile' +FS=$TESTPOOL/$TESTFS +log_must zfs create $FS/$TESTSUBFS1 +log_must zfs create $FS/$TESTSUBFS2 + +mntpnt1=$(get_prop mountpoint $FS/$TESTSUBFS1) +mntpnt2=$(get_prop mountpoint $FS/$TESTSUBFS2) + +log_must mkfile 200M $mntpnt1/$TESTFILE +log_must zfs snapshot $FS/$TESTSUBFS1@snap200m + +log_must zfs set refquota=10M $FS/$TESTSUBFS2 +log_mustnot eval "zfs send $FS/$TESTSUBFS1@snap200m |" \ + "zfs receive -F $FS/$TESTSUBFS2" + +log_pass "ZFS receive does not override refquota" + diff --git a/tests/zfs-tests/tests/functional/refreserv/Makefile.am b/tests/zfs-tests/tests/functional/refreserv/Makefile.am index 96f25d444e67..bd760a1f0697 100644 --- a/tests/zfs-tests/tests/functional/refreserv/Makefile.am +++ b/tests/zfs-tests/tests/functional/refreserv/Makefile.am @@ -6,7 +6,9 @@ dist_pkgdata_SCRIPTS = \ refreserv_002_pos.ksh \ refreserv_003_pos.ksh \ refreserv_004_pos.ksh \ - refreserv_005_pos.ksh + refreserv_005_pos.ksh \ + refreserv_multi_raidz.ksh \ + refreserv_raidz.ksh dist_pkgdata_DATA = \ refreserv.cfg diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh index da36609f2c41..3e5a78cf944f 100755 --- a/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_003_pos.ksh @@ -38,7 +38,7 @@ # space outside of this refreservation. # # STRATEGY: -# 1. Setting quota and refservation +# 1. Setting quota and refreservation # 2. Verify snapshot can be created, when used =< quota - refreserv # 3. Verify failed to create snapshot, when used > quota - refreserv # diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh new file mode 100755 index 000000000000..c904a807f17c --- /dev/null +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh @@ -0,0 +1,197 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/refreserv/refreserv.cfg + +# +# DESCRIPTION: +# raidz refreservation=auto picks worst raidz vdev +# +# STRATEGY: +# 1. Create a pool with a single raidz vdev +# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k] +# - create a volume +# - remember its refreservation +# - destroy the volume +# 3. Destroy the pool +# 4. Recreate the pool with one more disk in the vdev, then repeat steps +# 2 and 3. +# +# NOTES: +# 1. This test will use up to 14 disks but can cover the key concepts with +# 5 disks. +# 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely. +# + +verify_runnable "global" + +typeset -a alldisks=($DISKS) + +# The larger the volsize, the better zvol_volsize_to_reservation() is at +# guessing the right number - though it is horrible with tiny blocks. At 10M on +# ashift=12, the estimate may be over 26% too high. +volsize=100 + +function cleanup +{ + default_cleanup_noexit + default_setup_noexit "${alldisks[0]}" +} + +log_assert "raidz refreservation=auto picks worst raidz vdev" +log_onexit cleanup + +poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + +# Testing tiny block sizes on ashift=12 pools causes so much size inflation +# that small test disks may fill before creating small volumes. However, +# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for +# testing the problems that arise from 4K and 8K blocks on ashift=12 pools. +bps=$(lsblk -nrdo min-io /dev/${alldisks[0]}) +case "$bps" in +512) + allshifts=(9 10 17) + ;; +4096) + allshifts=(12 13 17) + ;; +*) + log_fail "bytes/sector: $bps != (512|4096)" + ;; +esac +log_note "Testing in ashift=${allshifts[0]} mode" + +typeset -A sizes= + +# +# Determine the refreservation for a $volsize MiB volume on each raidz type at +# various block sizes. +# +for parity in 1 2 3; do + raid=raidz$parity + typeset -A sizes["$raid"] + + # Ensure we hit scenarios with and without skip blocks + for ndisks in $((parity * 2)) $((parity * 2 + 1)); do + typeset -a disks=(${alldisks[0..$((ndisks - 1))]}) + + if (( ${#disks[@]} < ndisks )); then + log_note "Too few disks to test $raid-$ndisks" + continue + fi + + typeset -A sizes["$raid"]["$ndisks"] + + log_must zpool create "$TESTPOOL" "$raid" "${disks[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Gathering refreservation for $raid-$ndisks" \ + "volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m \ + -o volblocksize=$vbs "$vol" + + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$refres" + sizes["$raid"]["$ndisks"]["$vbs"]=$refres + + log_must_busy zfs destroy "$vol" + done + + log_must_busy zpool destroy "$TESTPOOL" + done +done + +# A little extra info is always helpful when diagnosing problems. To +# pretty-print what you find in the log, do this in ksh: +# typeset -A sizes=(...) +# print -v sizes +log_note "sizes=$(print -C sizes)" + +# +# Helper function for checking that refreservation is calculated properly in +# multi-vdev pools. "Properly" is defined as assuming that all vdevs are as +# space inefficient as the worst one. +# +function check_vdevs { + typeset raid=$1 + typeset nd1=$2 + typeset nd2=$3 + typeset -a disks1 disks2 + typeset vbs vol refres refres1 refres2 expect + + disks1=(${alldisks[0..$((nd1 - 1))]}) + disks2=(${alldisks[$nd1..$((nd1 + nd2 - 1))]}) + if (( ${#disks2[@]} < nd2 )); then + log_note "Too few disks to test $raid-$nd1 + $raid=$nd2" + return + fi + + log_must zpool create -f "$TESTPOOL" \ + "$raid" "${disks1[@]}" "$raid" "${disks2[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Verifying $raid-$nd1 $raid-$nd2 volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m -o volblocksize=$vbs "$vol" + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$refres" + + refres1=${sizes["$raid"]["$nd1"]["$vbs"]} + refres2=${sizes["$raid"]["$nd2"]["$vbs"]} + + if (( refres1 > refres2 )); then + log_note "Expecting refres ($refres) to match refres" \ + "from $raid-$nd1 ($refres1)" + log_must test "$refres" -eq "$refres1" + else + log_note "Expecting refres ($refres) to match refres" \ + "from $raid-$nd1 ($refres2)" + log_must test "$refres" -eq "$refres2" + fi + + log_must zfs destroy "$vol" + done + + log_must zpool destroy "$TESTPOOL" +} + +# +# Verify that multi-vdev pools use the last optimistic size for all the +# permutations within a particular raidz variant. +# +for raid in "${!sizes[@]}"; do + # ksh likes to create a [0] item for us. Thanks, ksh! + [[ $raid == "0" ]] && continue + + for nd1 in "${!sizes["$raid"][@]}"; do + # And with an empty array we get one key, ''. Thanks, ksh! + [[ $nd1 == "0" || -z "$nd1" ]] && continue + + for nd2 in "${!sizes["$raid"][@]}"; do + [[ $nd2 == "0" || -z "$nd2" ]] && continue + + check_vdevs "$raid" "$nd1" "$nd2" + done + done +done + +log_pass "raidz refreservation=auto picks worst raidz vdev" diff --git a/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh new file mode 100755 index 000000000000..9f25242de607 --- /dev/null +++ b/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh @@ -0,0 +1,131 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/refreserv/refreserv.cfg + +# +# DESCRIPTION: +# raidz refreservation=auto accounts for extra parity and skip blocks +# +# STRATEGY: +# 1. Create a pool with a single raidz vdev +# 2. For each block size [512b, 1k, 128k] or [4k, 8k, 128k] +# - create a volume +# - fully overwrite it +# - verify that referenced is less than or equal to reservation +# - destroy the volume +# 3. Destroy the pool +# 4. Recreate the pool with one more disk in the vdev, then repeat steps +# 2 and 3. +# 5. Repeat all steps above for raidz2 and raidz3. +# +# NOTES: +# 1. This test will use up to 14 disks but can cover the key concepts with +# 5 disks. +# 2. If the disks are a mixture of 4Kn and 512n/512e, failures are likely. +# + +verify_runnable "global" + +typeset -a alldisks=($DISKS) + +# The larger the volsize, the better zvol_volsize_to_reservation() is at +# guessing the right number. At 10M on ashift=12, the estimate may be over 26% +# too high. +volsize=100 + +function cleanup +{ + default_cleanup_noexit + default_setup_noexit "${alldisks[0]}" +} + +log_assert "raidz refreservation=auto accounts for extra parity and skip blocks" +log_onexit cleanup + +poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + +# Testing tiny block sizes on ashift=12 pools causes so much size inflation +# that small test disks may fill before creating small volumes. However, +# testing 512b and 1K blocks on ashift=9 pools is an ok approximation for +# testing the problems that arise from 4K and 8K blocks on ashift=12 pools. +bps=$(lsblk -nrdo min-io /dev/${alldisks[0]}) +log_must test "$bps" -eq 512 -o "$bps" -eq 4096 +case "$bps" in +512) + allshifts=(9 10 17) + maxpct=151 + ;; +4096) + allshifts=(12 13 17) + maxpct=110 + ;; +*) + log_fail "bytes/sector: $bps != (512|4096)" + ;; +esac +log_note "Testing in ashift=${allshifts[0]} mode" + +# This loop handles all iterations of steps 1 through 4 described in strategy +# comment above, +for parity in 1 2 3; do + raid=raidz$parity + + # Ensure we hit scenarios with and without skip blocks + for ndisks in $((parity * 2)) $((parity * 2 + 1)); do + typeset -a disks=(${alldisks[0..$((ndisks - 1))]}) + + if (( ${#disks[@]} < ndisks )); then + log_note "Too few disks to test $raid-$ndisks" + continue + fi + + log_must zpool create "$TESTPOOL" "$raid" "${disks[@]}" + + for bits in "${allshifts[@]}"; do + vbs=$((1 << bits)) + log_note "Testing $raid-$ndisks volblocksize=$vbs" + + vol=$TESTPOOL/$TESTVOL + log_must zfs create -V ${volsize}m \ + -o volblocksize=$vbs "$vol" + block_device_wait "/dev/zvol/$vol" + log_must dd if=/dev/zero of=/dev/zvol/$vol \ + bs=1024k count=$volsize + sync + + ref=$(zfs get -Hpo value referenced "$vol") + refres=$(zfs get -Hpo value refreservation "$vol") + log_must test -n "$ref" + log_must test -n "$refres" + + typeset -F2 deltapct=$((refres * 100.0 / ref)) + log_note "$raid-$ndisks refreservation $refres" \ + "is $deltapct% of reservation $res" + + log_must test "$ref" -le "$refres" + log_must test "$deltapct" -le $maxpct + + log_must_busy zfs destroy "$vol" + block_device_wait + done + + log_must_busy zpool destroy "$TESTPOOL" + done +done + +log_pass "raidz refreservation=auto accounts for extra parity and skip blocks" diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index c5d013e7c86a..1551a92e52c0 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -10,15 +10,16 @@ # # -# Copyright (c) 2014, 2015 by Delphix. All rights reserved. +# Copyright (c) 2014, 2019 by Delphix. All rights reserved. # pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal dist_pkgdata_SCRIPTS = \ - cleanup.ksh removal_all_vdev.ksh removal_check_space.ksh \ - removal_condense_export.ksh removal_multiple_indirection.ksh \ - removal_remap_deadlists.ksh removal_remap.ksh \ + cleanup.ksh removal_all_vdev.ksh removal_cancel.ksh \ + removal_check_space.ksh removal_condense_export.ksh \ + removal_multiple_indirection.ksh \ + removal_remap_deadlists.ksh removal_nopwrite.ksh removal_remap.ksh \ removal_reservation.ksh removal_resume_export.ksh \ removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \ removal_with_dedup.ksh removal_with_errors.ksh \ @@ -28,6 +29,9 @@ dist_pkgdata_SCRIPTS = \ removal_with_send.ksh removal_with_send_recv.ksh \ removal_with_snapshot.ksh removal_with_write.ksh \ removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \ - remove_raidz.ksh remove_expanded.ksh removal.kshlib + remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh + +dist_pkgdata_DATA = \ + removal.kshlib pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal diff --git a/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh new file mode 100755 index 000000000000..afb318ef2168 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh @@ -0,0 +1,99 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# +# Ensure that cancelling a removal midway does not cause any +# issues like cause a panic. +# +# STRATEGY: +# +# 1. Create a pool with one vdev and do some writes on it. +# 2. Add a new vdev to the pool and start the removal of +# the first vdev. +# 3. Cancel the removal after some segments have been copied +# over to the new vdev. +# 4. Run zdb to ensure the on-disk state of the pool is ok. +# + +function cleanup +{ + # + # Reset tunable. + # + log_must set_tunable32 zfs_removal_suspend_progress 0 +} +log_onexit cleanup + +SAMPLEFILE=/$TESTDIR/00 + +# +# Create pool with one disk. +# +log_must default_setup_noexit "$REMOVEDISK" + +# +# Create a file of size 1GB and then do some random writes. +# Since randwritecomp does 8K writes we do 25000 writes +# which means we write ~200MB to the vdev. +# +log_must mkfile -n 1g $SAMPLEFILE +log_must randwritecomp $SAMPLEFILE 25000 + +# +# Add second device where all the data will be evacuated. +# +log_must zpool add -f $TESTPOOL $NOTREMOVEDISK + +# +# Start removal. +# +log_must zpool remove $TESTPOOL $REMOVEDISK + +# +# Sleep a bit and hopefully allow removal to copy some data. +# +log_must sleep 1 + +# +# Block removal. +# +log_must set_tunable32 zfs_removal_suspend_progress 1 + +# +# Only for debugging purposes in test logs. +# +log_must zpool status $TESTPOOL + +# +# Cancel removal. +# +log_must zpool remove -s $TESTPOOL + +# +# Verify on-disk state. +# +log_must zdb $TESTPOOL + +log_pass "Device removal thread cancelled successfully." diff --git a/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh new file mode 100755 index 000000000000..e5d8261e80b6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh @@ -0,0 +1,87 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib +. $STF_SUITE/tests/functional/nopwrite/nopwrite.shlib + +default_setup_noexit "$DISKS" +log_onexit default_cleanup_noexit +BLOCKSIZE=8192 + +origin="$TESTPOOL/$TESTFS" + +log_must zfs set compress=on $origin +log_must zfs set checksum=edonr $origin + +log_must zfs set recordsize=8k $origin +dd if=/dev/urandom of=$TESTDIR/file_8k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." +log_must zfs set recordsize=128k $origin +dd if=/dev/urandom of=$TESTDIR/file_128k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." + +zfs snapshot $origin@a || log_fail "zfs snap failed" +log_must zfs clone $origin@a $origin/clone + +# +# Verify that nopwrites work prior to removal +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +# +# Remove a device before testing nopwrites again +# +log_must zpool remove $TESTPOOL $REMOVEDISK +log_must wait_for_removal $TESTPOOL +log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK + +# +# Normally, we expect nopwrites to avoid allocating new blocks, but +# after a device has been removed the DVAs will get remapped when +# a L0's indirect block is written. This will negate the effects +# of nopwrite and should result in new allocations. +# + +# +# Perform a direct zil nopwrite test +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +# +# Perform an indirect zil nopwrite test +# +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +log_pass "Remove works with nopwrite." diff --git a/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh b/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh new file mode 100755 index 000000000000..c4ba0d9ac564 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/remove_indirect.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright 2019, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# Device removal cannot remove non-concrete vdevs +# +# STRATEGY: +# 1. Create a pool with removable devices +# 2. Remove a top-level device +# 3. Verify we can't remove the "indirect" vdev created by the first removal +# + +verify_runnable "global" + +function cleanup +{ + destroy_pool $TESTPOOL + log_must rm -f $TEST_BASE_DIR/device-{1,2,3} +} + +log_assert "Device removal should not be able to remove non-concrete vdevs" +log_onexit cleanup + +# 1. Create a pool with removable devices +truncate -s $MINVDEVSIZE $TEST_BASE_DIR/device-{1,2,3} +zpool create $TESTPOOL $TEST_BASE_DIR/device-{1,2,3} + +# 2. Remove a top-level device +log_must zpool remove $TESTPOOL $TEST_BASE_DIR/device-1 +log_must wait_for_removal $TESTPOOL + +# 3. Verify we can't remove the "indirect" vdev created by the first removal +INDIRECT_VDEV=$(zpool list -v -g $TESTPOOL | awk '{if ($2 == "-") { print $1; exit} }') +log_must test -n "$INDIRECT_VDEV" +log_mustnot zpool remove $TESTPOOL $INDIRECT_VDEV + +log_pass "Device removal cannot remove non-concrete vdevs" diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh b/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh index b72b8e4a388e..b8220791f1d4 100755 --- a/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/reservation/reservation_001_pos.ksh @@ -115,7 +115,7 @@ for obj in $TESTPOOL/$TESTFS $OBJ_LIST; do # # Due to the way space is consumed and released by metadata we - # can't do an exact check here, but we do do a basic sanity + # can't do an exact check here, but we do a basic sanity # check. # log_must within_limits $space_avail $new_space_avail $RESV_TOLERANCE diff --git a/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh b/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh index fbf4276e8bda..a0cd039b1839 100755 --- a/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh @@ -85,7 +85,7 @@ resv_size_set=`expr $resv_space_avail / $num_resv_fs` # # We set the reservations now, rather than when we created the filesystems -# to allow us to take into account space used by the filsystem metadata +# to allow us to take into account space used by the filesystem metadata # # Note we don't set a reservation on the first filesystem we created, # hence num=1 rather than zero below. diff --git a/tests/zfs-tests/tests/functional/resilver/Makefile.am b/tests/zfs-tests/tests/functional/resilver/Makefile.am new file mode 100644 index 000000000000..465d8f3a3a31 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + resilver_restart_001.ksh + +dist_pkgdata_DATA = \ + resilver.cfg diff --git a/tests/zfs-tests/tests/functional/resilver/cleanup.ksh b/tests/zfs-tests/tests/functional/resilver/cleanup.ksh new file mode 100755 index 000000000000..4dfa81424513 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass diff --git a/tests/zfs-tests/tests/functional/resilver/resilver.cfg b/tests/zfs-tests/tests/functional/resilver/resilver.cfg new file mode 100644 index 000000000000..88dfd24aed20 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/resilver.cfg @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4} +SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1 + +VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 )) diff --git a/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh new file mode 100755 index 000000000000..876b28690c1c --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh @@ -0,0 +1,191 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +# +# DESCRIPTION: +# Testing resilver restart logic both with and without the deferred resilver +# feature enabled, verifying that resilver is not restarted when it is +# unecessary. +# +# STRATEGY: +# 1. Create a pool +# 2. Create four filesystems with the primary cache disable to force reads +# 3. Write four files simultaneously, one to each filesystem +# 4. Do with and without deferred resilvers enabled +# a. Replace a vdev with a spare & suspend resilver immediately +# b. Verify resilver starts properly +# c. Offline / online another vdev to introduce a new DTL range +# d. Verify resilver restart restart or defer +# e. Inject read errors on vdev that was offlined / onlned +# f. Verify that resilver did not restart +# g. Unsuspend resilver and wait for it to finish +# h. Verify that there are two resilvers and nothing is deferred +# + +function cleanup +{ + log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME + log_must set_tunable32 zfs_scan_suspend_progress \ + $ORIG_SCAN_SUSPEND_PROGRESS + log_must set_tunable32 zfs_zevent_len_max $ORIG_ZFS_ZEVENT_LEN_MAX + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE +} + +# count resilver events in zpool and number of deferred rsilvers on vdevs +function verify_restarts # +{ + msg=$1 + cnt=$2 + defer=$3 + + # check the number of resilver start in events log + RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start) + log_note "expected $cnt resilver start(s)$msg, found $RESILVERS" + [[ "$RESILVERS" -ne "$cnt" ]] && + log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS" + + [[ -z "$defer" ]] && return + + # use zdb to find which vdevs have the resilver defer flag + VDEV_DEFERS=$(zdb -C $TESTPOOL | \ + sed -n -e '/^ *children\[[0-9]\].*$/{h}' \ + -e '/ *com.datto:resilver_defer$/{g;p}') + + if [[ "$defer" == "-" ]] + then + [[ -n $VDEV_DEFERS ]] && + log_fail "didn't expect any vdevs to have resilver deferred" + return + fi + + [[ "x${VDEV_DEFERS}x" =~ "x +children[$defer]:x" ]] || + log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS" +} + +log_assert "Check for unnecessary resilver restarts" + +ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms) +ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress) +ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable zfs_zevent_len_max) + +set -A RESTARTS -- '1' '2' '2' '2' +set -A VDEVS -- '' '' '' '' +set -A DEFER_RESTARTS -- '1' '1' '1' '2' +set -A DEFER_VDEVS -- '-' '2' '2' '-' + +VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + +log_onexit cleanup + +# ensure that enough events will be saved +log_must set_tunable32 zfs_zevent_len_max 512 + +log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE + +log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ + raidz ${VDEV_FILES[@]} + +# create 4 filesystems +for fs in fs{0..3} +do + log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs +done + +# simultaneously write 16M to each of them +set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0 +log_note "Writing data files" +for path in ${DATAPATHS[@]} +do + dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 & +done +wait + +# test without and with deferred resilve feature enabled +for test in "without" "with" +do + log_note "Testing $test deferred resilvers" + + if [[ $test == "with" ]] + then + log_must zpool set feature@resilver_defer=enabled $TESTPOOL + RESTARTS=( "${DEFER_RESTARTS[@]}" ) + VDEVS=( "${DEFER_VDEVS[@]}" ) + VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + fi + + # clear the events + log_must zpool events -c + + # limit scanning time + log_must set_tunable32 zfs_resilver_min_time_ms 50 + + # initiate a resilver and suspend the scan as soon as possible + log_must zpool replace $TESTPOOL $VDEV_REPLACE + log_must set_tunable32 zfs_scan_suspend_progress 1 + + # there should only be 1 resilver start + verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" + + # offline then online a vdev to introduce a new DTL range after current + # scan, which should restart (or defer) the resilver + log_must zpool offline $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + log_must zpool online $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" + + # inject read io errors on vdev and verify resilver does not restart + log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL + log_must cat ${DATAPATHS[1]} > /dev/null + log_must zinject -c all + + # there should still be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" + + # unsuspend resilver + log_must set_tunable32 zfs_scan_suspend_progress 0 + log_must set_tunable32 zfs_resilver_min_time_ms 3000 + + # wait for resilver to finish + for iter in {0..59} + do + is_pool_resilvered $TESTPOOL && break + sleep 1 + done + is_pool_resilvered $TESTPOOL || + log_fail "resilver timed out" + + # wait for a few txg's to see if a resilver happens + log_must zpool sync $TESTPOOL + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts + verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" +done + +log_pass "Resilver did not restart unnecessarily" diff --git a/tests/zfs-tests/tests/functional/resilver/setup.ksh b/tests/zfs-tests/tests/functional/resilver/setup.ksh new file mode 100755 index 000000000000..4dfa81424513 --- /dev/null +++ b/tests/zfs-tests/tests/functional/resilver/setup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 521a1c7eb63c..7f88f55a0e86 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -158,14 +158,9 @@ function cmp_md5s { typeset file1=$1 typeset file2=$2 - eval md5sum $file1 | awk '{ print $1 }' > $BACKDIR/md5_file1 - eval md5sum $file2 | awk '{ print $1 }' > $BACKDIR/md5_file2 - diff $BACKDIR/md5_file1 $BACKDIR/md5_file2 - typeset -i ret=$? - - rm -f $BACKDIR/md5_file1 $BACKDIR/md5_file2 - - return $ret + typeset sum1=$(md5digest $file1) + typeset sum2=$(md5digest $file2) + test "$sum1" = "$sum2" } # @@ -343,7 +338,7 @@ function getds_with_suffix } # -# Output inherited properties whitch is edited for file system +# Output inherited properties which is edited for file system # function fs_inherit_prop { @@ -754,7 +749,7 @@ function verify_stream_size datasetexists $ds || log_fail "No such dataset: $ds" typeset stream_size=$(cat $stream | zstreamdump | sed -n \ - 's/ Total write size = \(.*\) (0x.*)/\1/p') + 's/ Total payload size = \(.*\) (0x.*)/\1/p') typeset inc_size=0 if [[ -n $inc_src ]]; then diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh index 5e657a898f4c..531478760457 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh @@ -38,7 +38,7 @@ # STRATEGY: # 1. Separately promote pool clone, filesystem clone and volume clone. # 2. Recursively backup all the POOL and restore in POOL2 -# 3. Verify all the datesets and property be properly received. +# 3. Verify all the datasets and properties were properly received. # verify_runnable "both" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh index 9ecd18d87da6..68f0e13927dc 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_011_pos.ksh @@ -63,7 +63,7 @@ for prop in $(fs_inherit_prop); do done # -# Inherit propertes in sub-datasets +# Inherit properties in sub-datasets # for ds in "$POOL/$FS/fs1" "$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do for prop in $(fs_inherit_prop) ; do diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh index 57d58b9bab77..d85970a74217 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh @@ -39,7 +39,7 @@ # 1. Setting properties for all the filesystem and volumes randomly # 2. Backup all the data from POOL by send -R # 3. Restore all the data in POOL2 -# 4. Verify all the perperties in two pools are same +# 4. Verify all the properties in the two pools are the same # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh index 60be67328e1c..cb68b1c3b27d 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh @@ -25,7 +25,7 @@ # # Strategy: # 1. Bookmark a ZFS snapshot -# 2. Destroy the ZFS sanpshot +# 2. Destroy the ZFS snapshot # 3. Destroy the filesystem for the receive # 4. Verify receive of the full send stream # 5. Start an incremental ZFS send of the ZFS bookmark, redirect output to a diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh index 20f0bee15572..2d9fb01af10f 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh @@ -25,7 +25,7 @@ # # Strategy: # 1. Destroy the filesystem for the receive -# 2. Unmount the source filsesystem +# 2. Unmount the source filesystem # 3. Start a full ZFS send, redirect output to a file # 4. Mess up the contents of the stream state file on disk # 5. Try ZFS receive, which should fail with a checksum mismatch error diff --git a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh index ceface9dbc09..d0754a4f1aaa 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh @@ -45,7 +45,7 @@ typeset inc=$BACKDIR/stream.inc log_must zfs create -o compress=lz4 $sendfs log_must zfs create -o compress=lz4 $recvfs typeset dir=$(get_prop mountpoint $sendfs) -# Don't use write_compressible: we want compressible but undedupable data here. +# Don't use write_compressible: we want compressible but undeduplicable data. log_must eval "dd if=/dev/urandom bs=1024k count=4 | base64 >$dir/file" log_must zfs snapshot $sendfs@snap0 log_must eval "zfs send -D -c $sendfs@snap0 >$stream0" diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh index d8d7c40e4931..05ba5ed244d9 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_resume.ksh @@ -28,7 +28,7 @@ # 2. Mess up the contents of the stream state file on disk # 3. Try ZFS receive, which should fail with a checksum mismatch error # 4. ZFS send to the stream state file again using the receive_resume_token -# 5. ZFS receieve and verify the receive completes successfully +# 5. ZFS receive and verify the receive completes successfully # 6. Repeat steps on an incremental ZFS send # diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh index 130bc3dbc9c3..f11068192880 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh @@ -89,4 +89,4 @@ for compress in $compress_types; do "$vol_csize and $vol_refer differed by too much" done -log_pass "The the stream size given by -P accounts for compressed send." +log_pass "The stream size given by -P accounts for compressed send." diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh index caaf07ccb7a2..988ed91b9918 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh @@ -49,8 +49,8 @@ typeset megs=8 log_must zfs create -V 256m -o compress=lz4 $vol write_compressible $BACKDIR ${megs}m 2 -md5_1=$(md5sum $data1 | awk '{print $1}') -md5_2=$(md5sum $data2 | awk '{print $1}') +md5_1=$(md5digest $data1) +md5_2=$(md5digest $data2) log_must dd if=$data1 of=$voldev bs=1024k log_must zfs snapshot $vol@snap @@ -60,8 +60,7 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/full" verify_stream_size $BACKDIR/full $vol verify_stream_size $BACKDIR/full $vol2 -md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5sum | \ - awk '{print $1}') +md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1" # Repeat, for an incremental send @@ -73,8 +72,7 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/inc" verify_stream_size $BACKDIR/inc $vol 90 $vol@snap verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap -md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5sum | \ - awk '{print $1}') +md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2" log_pass "Verify compressed send works with volumes" diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh index 49b846e9c332..0a7ae74822db 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh @@ -62,7 +62,7 @@ log_must eval "echo 'password' > $keyfile" log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \ -o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL -log_must block_device_wait +block_device_wait log_must eval "echo 'y' | newfs -t ext4 -v $zdev" log_must mkdir -p $mntpnt @@ -82,12 +82,12 @@ done log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile" log_must eval "zfs recv $TESTPOOL/recv < $sendfile" log_must zfs load-key $TESTPOOL/recv -log_must block_device_wait +block_device_wait log_must mount $recvdev $recvmnt -md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}') -md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}') +md5_1=$(cat $mntpnt/* | md5digest) +md5_2=$(cat $recvmnt/* | md5digest) [[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2" log_pass "zfs can receive raw, recursive, and deduplicated send streams" diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh index a216f1c5ff79..8e21acd99d28 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh @@ -58,7 +58,8 @@ log_assert "'zfs recv' must properly handle encryption properties" typeset keyfile=/$TESTPOOL/pkey typeset sendfile=/$TESTPOOL/sendfile -typeset snap=$TESTPOOL/ds@snap +typeset snap=$TESTPOOL/ds@snap1 +typeset snap2=$TESTPOOL/ds@snap2 typeset esnap=$TESTPOOL/crypt@snap1 typeset esnap2=$TESTPOOL/crypt@snap2 @@ -75,9 +76,10 @@ log_must zfs create -o keyformat=passphrase -o keylocation=file://$keyfile \ log_must mkfile 1M /$TESTPOOL/ds/$TESTFILE0 log_must cp /$TESTPOOL/ds/$TESTFILE0 /$TESTPOOL/crypt/$TESTFILE0 -typeset cksum=$(md5sum /$TESTPOOL/ds/$TESTFILE0 | awk '{ print $1 }') +typeset cksum=$(md5digest /$TESTPOOL/ds/$TESTFILE0) log_must zfs snap -r $snap +log_must zfs snap -r $snap2 log_must zfs snap -r $esnap log_must zfs snap -r $esnap2 @@ -127,7 +129,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -143,7 +145,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -161,7 +163,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -175,7 +177,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -189,7 +191,21 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5sum /$ds/$TESTFILE0 | awk '{ print $1 }') +recv_cksum=$(md5digest /$ds/$TESTFILE0) +log_must test "$recv_cksum" == "$cksum" +log_must zfs destroy -r $ds + +# Test that we can override an unencrypted, incremental, recursive stream's +# encryption settings, receiving all datasets as encrypted children. +log_note "Must be able to receive recursive stream to encrypted child" +ds=$TESTPOOL/crypt/recv +log_must eval "zfs send -R $snap2 > $sendfile" +log_must eval "zfs recv -x encryption $ds < $sendfile" +log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" +log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm" +log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" +log_must test "$(get_prop 'mounted' $ds)" == "yes" +recv_cksum=$(md5digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh index 3c3de86d91c6..83a79784d226 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh @@ -25,7 +25,7 @@ # Strategy: # 1. Create a pool containing an encrypted filesystem. # 2. Use 'zfs send -wp' to perform a raw send of the initial filesystem. -# 3. Repeat the followings steps N times to verify raw incremental receives. +# 3. Repeat the following steps N times to verify raw incremental receives. # a) Randomly change several key dataset properties. # b) Modify the contents of the filesystem such that dnode reallocation # is likely during the 'zfs receive', and receive_object() exercises diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh index 4b89a73d8081..8e9db969bc23 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh @@ -15,6 +15,7 @@ # Copyright (c) 2019 by Lawrence Livermore National Security, LLC. # +. $STF_SUITE/include/properties.shlib . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/rsend/rsend.kshlib @@ -25,7 +26,7 @@ # Strategy: # 1. Create a pool containing an encrypted filesystem. # 2. Use 'zfs send -wp' to perform a raw send of the initial filesystem. -# 3. Repeat the followings steps N times to verify raw incremental receives. +# 3. Repeat the following steps N times to verify raw incremental receives. # a) Randomly change several key dataset properties. # b) Modify the contents of the filesystem such that dnode reallocation # is likely during the 'zfs receive', and receive_object() exercises @@ -71,7 +72,7 @@ for i in {1..$passes}; do # Randomly modify several dataset properties in order to generate # more interesting incremental send streams. rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256" - rand_set_prop $POOL/fs compression "off" "lzjb" "gzip" "lz4" + rand_set_prop $POOL/fs compression "${compress_prop_vals[@]}" rand_set_prop $POOL/fs recordsize "32K" "128K" rand_set_prop $POOL/fs dnodesize "legacy" "auto" "4k" rand_set_prop $POOL/fs xattr "on" "sa" diff --git a/tests/zfs-tests/tests/functional/slog/Makefile.am b/tests/zfs-tests/tests/functional/slog/Makefile.am index 4548ce63b40c..33e3a6d3a496 100644 --- a/tests/zfs-tests/tests/functional/slog/Makefile.am +++ b/tests/zfs-tests/tests/functional/slog/Makefile.am @@ -17,7 +17,8 @@ dist_pkgdata_SCRIPTS = \ slog_013_pos.ksh \ slog_014_pos.ksh \ slog_015_neg.ksh \ - slog_replay_fs.ksh \ + slog_replay_fs_001.ksh \ + slog_replay_fs_002.ksh \ slog_replay_volume.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/slog/setup.ksh b/tests/zfs-tests/tests/functional/slog/setup.ksh index f30824d3ee90..8e8d214d823c 100755 --- a/tests/zfs-tests/tests/functional/slog/setup.ksh +++ b/tests/zfs-tests/tests/functional/slog/setup.ksh @@ -38,13 +38,4 @@ if ! verify_slog_support ; then log_unsupported "This system doesn't support separate intent logs" fi -if [[ -d $VDEV ]]; then - log_must rm -rf $VDIR -fi -if [[ -d $VDEV2 ]]; then - log_must rm -rf $VDIR2 -fi -log_must mkdir -p $VDIR $VDIR2 -log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 - log_pass diff --git a/tests/zfs-tests/tests/functional/slog/slog.kshlib b/tests/zfs-tests/tests/functional/slog/slog.kshlib index 6ed7e4e0502f..75cfec2d832d 100644 --- a/tests/zfs-tests/tests/functional/slog/slog.kshlib +++ b/tests/zfs-tests/tests/functional/slog/slog.kshlib @@ -31,11 +31,20 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/slog/slog.cfg +function setup +{ + log_must rm -rf $VDIR $VDIR2 + log_must mkdir -p $VDIR $VDIR2 + log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 + + return 0 +} + function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 - rm -rf $TESTDIR + rm -rf $TESTDIR $VDIR $VDIR2 } # diff --git a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh index 3d3daf5f9ccc..a4c35ed9e98e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Creating a pool with a log device succeeds." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh index b056f19cdb80..91904aa612d1 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding a log device to normal pool works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh index c647b8f54b75..0b4d6ede3e13 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding an extra log device works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh index 4b0b3439a2e3..10f28dcc000b 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Attaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh index cbbb9486913a..4836f6f27937 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Detaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh index 53e8c67ca005..24143196fd2e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Replacing a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh index 4926fb7b3192..27ac38606c29 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh @@ -48,6 +48,7 @@ verify_runnable "global" log_assert "Exporting and importing pool with log devices passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh index 587e0e321222..54587a0c61a7 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log is not supported." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh index e7091f17b759..222f71a99928 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log can not be added to existed pool." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh index 8fe248ffbcba..edd9abea0930 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Slog device can not be replaced with spare device." log_onexit cleanup +log_must setup log_must zpool create $TESTPOOL $VDEV spare $SDEV log $LDEV sdev=$(random_get $SDEV) diff --git a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh index 2dad200b31c1..3bebc8201713 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Offline and online a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh index 45566d427f1d..8d6fb2bffb7f 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Pool can survive when one of mirror log device get corrupted." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh index bbe5adc24174..d6917065ddbf 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh @@ -60,6 +60,7 @@ log_assert "Verify slog device can be disk, file, lofi device or any device " \ "that presents a block interface." verify_disk_count "$DISKS" 2 log_onexit cleanup_testenv +log_must setup dsk1=${DISKS%% *} log_must zpool create $TESTPOOL ${DISKS#$dsk1} diff --git a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh index 0ec96ae1e6f7..e8ea29f1ffa3 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "log device can survive when one of the pool device get corrupted." +log_must setup for type in "mirror" "raidz" "raidz2"; do for spare in "" "spare"; do diff --git a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh index 37821888ea00..a53aeabffcdd 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh @@ -26,7 +26,7 @@ # 3. Concurrently do the following: # 3.1. Perform 8K sync writes # 3.2. Perform log offline/online commands -# 4. Loop to test with growing "zfs_commit_timout_pct" values. +# 4. Loop to test with growing "zfs_commit_timeout_pct" values. # verify_runnable "global" @@ -47,6 +47,7 @@ function cleanup ORIG_TIMEOUT=$(get_tunable zfs_commit_timeout_pct | tail -1 | awk '{print $NF}') log_onexit cleanup +log_must setup for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do log_must set_tunable64 zfs_commit_timeout_pct $PCT diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh similarity index 91% rename from tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh rename to tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 5f281a756f15..8954caa1c933 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -58,14 +58,9 @@ verify_runnable "global" -function cleanup_fs -{ - rm -f $TESTDIR/checksum - cleanup -} - log_assert "Replay of intent log succeeds." -log_onexit cleanup_fs +log_onexit cleanup +log_must setup # # 1. Create an empty file system (TESTFS) @@ -114,7 +109,7 @@ log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete # Create a simple validation payload log_must mkdir -p $TESTDIR log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8 -log_must eval "sha256sum -b /$TESTPOOL/$TESTFS/payload >$TESTDIR/checksum" +typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload) # TX_WRITE (small file with ordering) log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file @@ -160,6 +155,14 @@ log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file +# TX_WRITE, TX_LINK, TX_REMOVE +# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \ + count=8 +log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ + /$TESTPOOL/$TESTFS/link_and_unlink.link +log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) # @@ -201,6 +204,8 @@ log_note "Verify working set diff:" log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy log_note "Verify file checksum:" -log_must sha256sum -c $TESTDIR/checksum +typeset checksum1=$(sha256digest /$TESTPOOL/$TESTFS/payload) +[[ "$checksum1" == "$checksum" ]] || \ + log_fail "checksum mismatch ($checksum1 != $checksum)" log_pass "Replay of intent log succeeds." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh new file mode 100755 index 000000000000..3c3ccdf4ad23 --- /dev/null +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh @@ -0,0 +1,137 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify slog replay correctly when TX_REMOVEs are followed by +# TX_CREATEs. +# +# STRATEGY: +# 1. Create a file system (TESTFS) with a lot of files +# 2. Freeze TESTFS +# 3. Remove all files then create a lot of files +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare TESTFS against the TESTDIR/copy +# + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Replay of intent log succeeds." +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) with a lot of files +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# Prep for the test of TX_REMOVE followed by TX_CREATE +dnsize=(legacy auto 1k 2k 4k 8k 16k) +NFILES=200 +log_must mkdir /$TESTPOOL/$TESTFS/dir0 +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# Reimport to reset dnode allocation pointer. +# This is to make sure we will have TX_REMOVE and TX_CREATE on same id +# +log_must zpool export $TESTPOOL +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Remove all files then create a lot of files +# +# TX_REMOVE followed by TX_CREATE +log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*' +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# +log_must mkdir -p $TESTDIR/copy +log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/ + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare TESTFS against the TESTDIR/copy +# +log_note "Verify current block usage:" +log_must zdb -bcv $TESTPOOL + +log_note "Verify number of files" +log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES + +log_note "Verify working set diff:" +log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy + +log_pass "Replay of intent log succeeds." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index 2cdcb38dc257..f513d04fe189 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -76,6 +76,7 @@ function cleanup_volume log_assert "Replay of intent log succeeds." log_onexit cleanup_volume +log_must setup # # 1. Create an empty volume (TESTVOL), set sync=always, and format @@ -86,7 +87,7 @@ log_must zfs create -V 128M $TESTPOOL/$TESTVOL log_must zfs set compression=on $TESTPOOL/$TESTVOL log_must zfs set sync=always $TESTPOOL/$TESTVOL log_must mkdir -p $TESTDIR -log_must block_device_wait +block_device_wait echo "y" | newfs -t ext4 -v $VOLUME log_must mkdir -p $MNTPNT log_must mount -o discard $VOLUME $MNTPNT @@ -127,7 +128,7 @@ fi # # 4. Generate checksums for all ext4 files. # -log_must sha256sum -b $MNTPNT/* >$TESTDIR/checksum +typeset checksum=$(cat $MNTPNT/* | sha256digest) # # 5. Unmount filesystem and export the pool @@ -149,7 +150,7 @@ log_must zpool export $TESTPOOL # `zpool import -f` because we can't write a frozen pool's labels! # log_must zpool import -f $TESTPOOL -log_must block_device_wait +block_device_wait log_must mount $VOLUME $MNTPNT # @@ -159,6 +160,8 @@ log_note "Verify current block usage:" log_must zdb -bcv $TESTPOOL log_note "Verify checksums" -log_must sha256sum -c $TESTDIR/checksum +typeset checksum1=$(cat $MNTPNT/* | sha256digest) +[[ "$checksum1" == "$checksum" ]] || \ + log_fail "checksum mismatch ($checksum1 != $checksum)" log_pass "Replay of intent log succeeds." diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh index b404ffbd50e6..124a7db9c6e6 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh @@ -36,7 +36,7 @@ # DESCRIPTION: # An archive of a zfs file system and an archive of its snapshot # is identical even though the original file system has -# changed sinced the snapshot was taken. +# changed since the snapshot was taken. # # STRATEGY: # 1) Create files in all of the zfs file systems diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh index dc50e46933aa..68a616c02a6c 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh @@ -35,7 +35,7 @@ # # DESCRIPTION: # An archive of a zfs dataset and an archive of its snapshot -# changed sinced the snapshot was taken. +# changed since the snapshot was taken. # # STRATEGY: # 1) Create some files in a ZFS dataset diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh index 6607d4ca4974..1ee7e33c2ac2 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh @@ -88,7 +88,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #verify the snapshot -r results for snap in $snappool $snapfs $snapvol $snapctr $snapctrvol \ diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh index 0f876ad6d61e..128b443c6fc9 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh @@ -83,7 +83,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #select the $TESTCTR as destroy point, $TESTCTR is a child of $TESTPOOL log_must zfs destroy -r $snapctr @@ -92,7 +92,7 @@ for snap in $snapctr $snapctrvol $snapctrclone $snapctrfs; do log_fail "The snapshot $snap is not destroyed correctly." done -for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1;do +for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1; do ! snapexists $snap && \ log_fail "The snapshot $snap should be not destroyed." done diff --git a/tests/zfs-tests/tests/functional/suid/.gitignore b/tests/zfs-tests/tests/functional/suid/.gitignore new file mode 100644 index 000000000000..a9a3db79ba44 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/.gitignore @@ -0,0 +1 @@ +/suid_write_to_file diff --git a/tests/zfs-tests/tests/functional/suid/Makefile.am b/tests/zfs-tests/tests/functional/suid/Makefile.am new file mode 100644 index 000000000000..594d2b77ca8e --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/Makefile.am @@ -0,0 +1,16 @@ +include $(top_srcdir)/config/Rules.am + +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +dist_pkgdata_SCRIPTS = \ + suid_write_to_suid.ksh \ + suid_write_to_sgid.ksh \ + suid_write_to_suid_sgid.ksh \ + suid_write_to_none.ksh \ + cleanup.ksh \ + setup.ksh + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +pkgexec_PROGRAMS = suid_write_to_file +suid_write_to_file_SOURCES = suid_write_to_file.c diff --git a/tests/zfs-tests/tests/functional/suid/cleanup.ksh b/tests/zfs-tests/tests/functional/suid/cleanup.ksh new file mode 100755 index 000000000000..6e41e02faf58 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/suid/setup.ksh b/tests/zfs-tests/tests/functional/suid/setup.ksh new file mode 100755 index 000000000000..d04d5568c003 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/setup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c new file mode 100644 index 000000000000..571dc553bec2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void +test_stat_mode(mode_t extra) +{ + struct stat st; + int i, fd; + char fpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + char buf[] = "test"; + mode_t res; + mode_t mode = 0777 | extra; + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + umask(0); + if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) { + perror("mkdir"); + exit(2); + } + + snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]); + unlink(fpath); + if (stat(fpath, &st) == 0) { + fprintf(stderr, "%s exists\n", fpath); + exit(3); + } + + fd = creat(fpath, mode); + if (fd == -1) { + perror("creat"); + exit(4); + } + close(fd); + + if (setuid(65534) == -1) { + perror("setuid"); + exit(5); + } + + fd = open(fpath, O_RDWR); + if (fd == -1) { + perror("open"); + exit(6); + } + + if (write(fd, buf, sizeof (buf)) == -1) { + perror("write"); + exit(7); + } + close(fd); + + if (stat(fpath, &st) == -1) { + perror("stat"); + exit(8); + } + unlink(fpath); + + /* Verify SUID/SGID are dropped */ + res = st.st_mode & (0777 | S_ISUID | S_ISGID); + if (res != (mode & 0777)) { + fprintf(stderr, "stat(2) %o\n", res); + exit(9); + } +} + +int +main(int argc, char *argv[]) +{ + const char *name; + mode_t extra; + + if (argc < 2) { + fprintf(stderr, "Invalid argc\n"); + exit(1); + } + + name = argv[1]; + if (strcmp(name, "SUID") == 0) { + extra = S_ISUID; + } else if (strcmp(name, "SGID") == 0) { + extra = S_ISGID; + } else if (strcmp(name, "SUID_SGID") == 0) { + extra = S_ISUID | S_ISGID; + } else if (strcmp(name, "NONE") == 0) { + extra = 0; + } else { + fprintf(stderr, "Invalid name %s\n", name); + exit(1); + } + + test_stat_mode(extra); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh new file mode 100755 index 000000000000..dd01978619f9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to regular file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to regular file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" + +log_pass "Verify write(2) to regular file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh new file mode 100755 index 000000000000..49ae2bd1b31e --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" + +log_pass "Verify write(2) to SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh new file mode 100755 index 000000000000..3983aad2e51d --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" + +log_pass "Verify write(2) to SUID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh new file mode 100755 index 000000000000..a058c7e7d4bc --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID/SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID/SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID/SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" + +log_pass "Verify write(2) to SUID/SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/tmpfile/.gitignore b/tests/zfs-tests/tests/functional/tmpfile/.gitignore index b7a19481ad29..de014c5256ce 100644 --- a/tests/zfs-tests/tests/functional/tmpfile/.gitignore +++ b/tests/zfs-tests/tests/functional/tmpfile/.gitignore @@ -2,3 +2,4 @@ /tmpfile_001_pos /tmpfile_002_pos /tmpfile_003_pos +/tmpfile_stat_mode diff --git a/tests/zfs-tests/tests/functional/tmpfile/Makefile.am b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am index 411445217a6d..35a1f44c1693 100644 --- a/tests/zfs-tests/tests/functional/tmpfile/Makefile.am +++ b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am @@ -8,7 +8,8 @@ dist_pkgdata_SCRIPTS = \ pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile -pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos tmpfile_003_pos +pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos \ + tmpfile_003_pos tmpfile_stat_mode tmpfile_test_SOURCES= tmpfile_test.c tmpfile_001_pos_SOURCES = tmpfile_001_pos.c tmpfile_002_pos_SOURCES = tmpfile_002_pos.c diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c new file mode 100644 index 000000000000..bf71d429c3fd --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_stat_mode.c @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Verify stat(2) for O_TMPFILE file considers umask. + * + * STRATEGY: + * 1. open(2) with O_TMPFILE. + * 2. linkat(2). + * 3. fstat(2)/stat(2) and verify .st_mode value. + */ + +static void +test_stat_mode(mode_t mask) +{ + struct stat st, fst; + int i, fd; + char spath[1024], dpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + mode_t masked = 0777 & ~mask; + mode_t mode; + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + umask(mask); + fd = open(penv[0], O_RDWR|O_TMPFILE, 0777); + if (fd == -1) { + perror("open"); + exit(2); + } + + if (fstat(fd, &fst) == -1) { + perror("fstat"); + close(fd); + exit(3); + } + + snprintf(spath, sizeof (spath), "/proc/self/fd/%d", fd); + snprintf(dpath, sizeof (dpath), "%s/%s", penv[0], penv[1]); + + unlink(dpath); + if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) == -1) { + perror("linkat"); + close(fd); + exit(4); + } + close(fd); + + if (stat(dpath, &st) == -1) { + perror("stat"); + exit(5); + } + unlink(dpath); + + /* Verify fstat(2) result */ + mode = fst.st_mode & 0777; + if (mode != masked) { + fprintf(stderr, "fstat(2) %o != %o\n", mode, masked); + exit(6); + } + + /* Verify stat(2) result */ + mode = st.st_mode & 0777; + if (mode != masked) { + fprintf(stderr, "stat(2) %o != %o\n", mode, masked); + exit(7); + } +} + +int +main(int argc, char *argv[]) +{ + fprintf(stdout, "Verify stat(2) for O_TMPFILE file considers umask.\n"); + + test_stat_mode(0022); + test_stat_mode(0077); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index 6ce396a38012..e41e325687d4 100755 --- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -89,7 +89,7 @@ for type in "" "mirror" "raidz2"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R - verify_vdevs "-gt" "$VDEV_MAX_MB" $VDEVS + verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, wait for trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file diff --git a/tests/zfs-tests/tests/functional/trim/trim.kshlib b/tests/zfs-tests/tests/functional/trim/trim.kshlib index 02802d8c91bf..ed6a8f91b970 100644 --- a/tests/zfs-tests/tests/functional/trim/trim.kshlib +++ b/tests/zfs-tests/tests/functional/trim/trim.kshlib @@ -18,7 +18,7 @@ . $STF_SUITE/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib # -# Get the actual on disk disk for the provided file. +# Get the actual size on disk for the provided file. # function get_size_mb { diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh index e56bd6248f2c..993072b1084f 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -88,7 +88,7 @@ for type in "" "mirror" "raidz2"; do # Fill the pool, verify the vdevs are no longer sparse. file_write -o create -f /$TESTPOOL/file -b 1048576 -c $fill_mb -d R - verify_vdevs "-gt" "$VDEV_MAX_MB" $VDEVS + verify_vdevs "-ge" "$VDEV_MAX_MB" $VDEVS # Remove the file, issue trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file diff --git a/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib b/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib index 679ff3049234..6ffd85b5b1b6 100644 --- a/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib +++ b/tests/zfs-tests/tests/functional/upgrade/upgrade_common.kshlib @@ -34,8 +34,8 @@ export TMPDEV=$TEST_BASE_DIR/zpool_upgrade_test.dat function cleanup_upgrade { - datasetexists $TESTPOOL/fs1 && log_must zfs destroy $TESTPOOL/fs1 - datasetexists $TESTPOOL/fs2 && log_must zfs destroy $TESTPOOL/fs2 - datasetexists $TESTPOOL/fs3 && log_must zfs destroy $TESTPOOL/fs3 - datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL + destroy_dataset "$TESTPOOL/fs1" + destroy_dataset "$TESTPOOL/fs2" + destroy_dataset "$TESTPOOL/fs3" + destroy_pool "$TESTPOOL" } diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh index 825ebe09b28b..5684b05b7e4b 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh @@ -64,7 +64,7 @@ for user in "${no_users[@]}"; do log_mustnot zfs set userquota@$user=100m $QFS done -log_note "can set all numberic id even that id is not existed" +log_note "can set all numeric id even if that id does not exist" log_must zfs set userquota@12345678=100m $QFS log_mustnot zfs set userquota@12345678=100m $snap_fs diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh index 08af6560dc87..20c9c56ba5ef 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_010_pos.ksh @@ -33,7 +33,7 @@ # # DESCRIPTION: -# Check userquota and groupquota be overwrited at same time +# Check userquota and groupquota being exceeded at the same time # # # STRATEGY: diff --git a/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh b/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh index 088499eb0426..b553f91d40da 100755 --- a/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh +++ b/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh @@ -56,7 +56,7 @@ log_onexit cleanup typeset snap_fs=$QFS@snap log_assert "Check set userquota and groupquota on snapshot" -log_note "Check can not set user|group quuota on snapshot" +log_note "Check can not set user|group quota on snapshot" log_must zfs snapshot $snap_fs log_mustnot zfs set userquota@$QUSER1=$UQUOTA_SIZE $snap_fs diff --git a/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh b/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh index 8cf8e6d4055b..066be917e436 100755 --- a/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh @@ -41,6 +41,7 @@ orig_top=$(get_top_vd_zap $DISK $conf) orig_leaf=$(get_leaf_vd_zap $DISK $conf) assert_zap_common $TESTPOOL $DISK "top" $orig_top assert_zap_common $TESTPOOL $DISK "leaf" $orig_leaf +log_must zpool sync # Export the pool. log_must zpool export $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/xattr/cleanup.ksh b/tests/zfs-tests/tests/functional/xattr/cleanup.ksh index 5090906199ce..b3629629c78c 100755 --- a/tests/zfs-tests/tests/functional/xattr/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/xattr/cleanup.ksh @@ -30,9 +30,6 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/xattr/xattr_common.kshlib -del_user $ZFS_USER -del_group $ZFS_GROUP - USES_NIS=$(cat $TEST_BASE_DIR/zfs-xattr-test-nis.txt) rm $TEST_BASE_DIR/zfs-xattr-test-nis.txt @@ -41,4 +38,9 @@ then svcadm enable svc:/network/nis/client:default fi -default_cleanup +default_cleanup_noexit + +del_user $ZFS_USER +del_group $ZFS_GROUP + +log_pass diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh index a56fce4eaba1..0a661e935b78 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh @@ -37,8 +37,8 @@ # should fail. # # STRATEGY: -# 1. Create a file, and set an with an xattr -# 2. Set the octal file permissions to 000 on the file. +# 1. Create a file with an xattr +# 2. Set the file permissions to 000 # 3. Check that we're unable to read the xattr as a non-root user # 4. Check that we're unable to write an xattr as a non-root user # diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh index 80704fad75e1..246f077af0a8 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh @@ -135,7 +135,7 @@ else fi log_note "Checking mv" -# mv doesn't have any flags to preserve/ommit xattrs - they're +# mv doesn't have any flags to preserve/omit xattrs - they're # always moved. log_must touch $TESTDIR/mvfile.$$ create_xattr $TESTDIR/mvfile.$$ passwd /etc/passwd diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh index 256ca53241bf..9ccf3f9ded55 100755 --- a/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh @@ -40,7 +40,7 @@ # # STRATEGY: # 1. Modify /etc/vfstab to add the test zvol as swap device. -# 2. Use /sbin/swapadd to add zvol as swap device throuth /etc/vfstab +# 2. Use /sbin/swapadd to add zvol as swap device through /etc/vfstab # 3. Create a file under /tmp and verify the file # diff --git a/tests/zfs-tests/tests/perf/Makefile.am b/tests/zfs-tests/tests/perf/Makefile.am index 68dd31ec12b1..294b136b3854 100644 --- a/tests/zfs-tests/tests/perf/Makefile.am +++ b/tests/zfs-tests/tests/perf/Makefile.am @@ -1,5 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/perf -dist_pkgdata_SCRIPTS = \ +dist_pkgdata_DATA = \ nfs-sample.cfg \ perf.shlib diff --git a/tests/zfs-tests/tests/perf/perf.shlib b/tests/zfs-tests/tests/perf/perf.shlib index 69e61e9fd122..e2e84ca02acc 100644 --- a/tests/zfs-tests/tests/perf/perf.shlib +++ b/tests/zfs-tests/tests/perf/perf.shlib @@ -373,6 +373,23 @@ function get_directory echo $directory } +function get_min_arc_size +{ + if is_linux; then + typeset -l min_arc_size=`awk '$1 == "c_min" { print $3 }' \ + /proc/spl/kstat/zfs/arcstats` + else + typeset -l min_arc_size=$(dtrace -qn 'BEGIN { + printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); + exit(0); + }') + fi + + [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" + + echo $min_arc_size +} + function get_max_arc_size { if is_linux; then diff --git a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh index 38c0669f69eb..e368ed236779 100755 --- a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh @@ -1,4 +1,4 @@ -#!/usr/bin/ksh +#!/bin/ksh # file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version diff --git a/udev/rules.d/69-vdev.rules.in b/udev/rules.d/69-vdev.rules.in index 36a1a8ed5451..e0f23efc7283 100644 --- a/udev/rules.d/69-vdev.rules.in +++ b/udev/rules.d/69-vdev.rules.in @@ -6,6 +6,7 @@ ENV{DEVTYPE}=="disk", IMPORT{program}="@udevdir@/vdev_id -d %k" ENV{DEVTYPE}=="partition", IMPORT{program}="@udevdir@/vdev_id -d %k" KERNEL=="*[!0-9]", ENV{SUBSYSTEM}=="block", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}" +KERNEL=="nvme*[0-9]n*[0-9]", ENV{SUBSYSTEM}=="block", ENV{DEVTYPE}=="disk", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}" KERNEL=="*[0-9]", ENV{SUBSYSTEM}=="block", ENV{DEVTYPE}=="partition", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}-part%n" KERNEL=="dm-[0-9]*", ENV{SUBSYSTEM}=="block", ENV{ID_VDEV}=="?*", SYMLINK+="$env{ID_VDEV_PATH}"