From 15a66fdde289cf7d2f260f8eb277d5057d5820f7 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Tue, 27 Jun 2017 11:05:01 -0700 Subject: [PATCH 1/2] Add pool prop `partition` to disable auto partition zfsonlinux always partition disk when it detects the device given is a whole disk. This legacy behavior from Illumos, however, has no apparent benefit on Linux, but has some down sides besides confusion. E.g. autoexpand, switching to dm device requires partprobe. We add a pool property `partition` to be set during pool create. It currently has two values, legacy and raw. When setting it to legacy, it will behave as it did. When setiing it to raw, it will always use the device as is without partitioning even if it's a whole disk. This property applies to all commands that add disks to pool, so zpool add/attach/replace will partition or not partition based on the property on the target pool. A pool without this property will be treated as legacy. Newly created pool will by default have partition=legacy. Signed-off-by: Chunwei Chen --- cmd/zed/agents/zfs_mod.c | 8 +++-- cmd/zpool/zpool_vdev.c | 60 ++++++++++++++++++++++++++----------- include/sys/fs/zfs.h | 6 ++++ lib/libzfs/libzfs_pool.c | 29 +++++++++++------- lib/libzfs/libzfs_util.c | 12 ++++---- module/zcommon/zpool_prop.c | 10 +++++++ 6 files changed, 89 insertions(+), 36 deletions(-) diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index a906decab406..54a5c54db382 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -192,6 +192,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) int ret; int is_dm = 0; int is_sd = 0; + uint64_t part; uint_t c; vdev_stat_t *vs; @@ -223,6 +224,9 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) physpath ? physpath : "NULL", wholedisk, is_dm, (long long unsigned int)guid); + /* Find out the partition type */ + part = zpool_get_prop_int(zhp, ZPOOL_PROP_PARTITION, NULL); + /* * The VDEV guid is preferred for identification (gets passed in path) */ @@ -234,7 +238,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) * otherwise use path sans partition suffix for whole disks */ (void) strlcpy(fullpath, path, sizeof (fullpath)); - if (wholedisk) { + if (wholedisk && part != ZPOOL_PARTITION_RAW) { char *spath = zfs_strip_partition(fullpath); if (!spath) { zed_log_msg(LOG_INFO, "%s: Can't alloc", @@ -309,7 +313,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) nvlist_lookup_string(vdev, "new_devid", &new_devid); - if (is_dm) { + if (is_dm || part == ZPOOL_PARTITION_RAW) { /* Don't label device mapper or multipath disks. */ } else if (!labeled) { /* diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 4c3793d6eb58..39a5aeec63c8 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -1188,7 +1188,7 @@ zero_label(char *path) * need to get the devid after we label the disk. */ static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv) +make_disks(zpool_handle_t *zhp, nvlist_t *props, nvlist_t *nv) { nvlist_t **child; uint_t c, children; @@ -1196,6 +1196,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) char devpath[MAXPATHLEN]; char udevpath[MAXPATHLEN]; uint64_t wholedisk; + uint64_t part = ZPOOL_PARTITION_LEGACY; struct stat64 statbuf; int is_exclusive = 0; int fd; @@ -1203,6 +1204,23 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + /* + * Find out the partition type. If zhp exists, this is an existing + * pool, get it from pool property. Otherwise, this is pool creation, + * get it from cmdline from props. + */ + if (zhp) + part = zpool_get_prop_int(zhp, ZPOOL_PROP_PARTITION, NULL); + else if (props) { + char *strval; + uint64_t index; + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_PARTITION), &strval) == 0 && + zpool_prop_string_to_index(ZPOOL_PROP_PARTITION, strval, + &index) == 0) + part = index; + } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { @@ -1210,7 +1228,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (0); /* - * We have a disk device. If this is a whole disk write + * We have a disk device. If this is a legacy whole disk write * out the efi partition table, otherwise write zero's to * the first 4k of the partition. This is to ensure that * libblkid will not misidentify the partition due to a @@ -1220,11 +1238,11 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); - if (!wholedisk) { + if (!wholedisk || part == ZPOOL_PARTITION_RAW) { /* * Update device id string for mpath nodes (Linux only) */ - if (is_mpath_whole_disk(path)) + if (wholedisk || is_mpath_whole_disk(path)) update_vdev_config_dev_strs(nv); if (!is_spare(NULL, path)) @@ -1319,19 +1337,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) } for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, props, child[c])) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, props, child[c])) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, props, child[c])) != 0) return (ret); return (0); @@ -1342,8 +1360,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) * the majority of this task. */ static boolean_t -is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, - boolean_t replacing, boolean_t isspare) +is_device_in_use(zpool_handle_t *zhp, nvlist_t *config, nvlist_t *nv, + boolean_t force, boolean_t replacing, boolean_t isspare) { nvlist_t **child; uint_t c, children; @@ -1369,8 +1387,13 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, * regardless of what libblkid or zpool_in_use() says. */ if (replacing) { + uint64_t part = ZPOOL_PARTITION_LEGACY; + if (zhp) + part = zpool_get_prop_int(zhp, + ZPOOL_PROP_PARTITION, NULL); + (void) strlcpy(buf, path, sizeof (buf)); - if (wholedisk) { + if (wholedisk && part == ZPOOL_PARTITION_LEGACY) { ret = zfs_append_partition(buf, sizeof (buf)); if (ret == -1) return (-1); @@ -1390,22 +1413,22 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, } for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, + if (is_device_in_use(zhp, config, child[c], force, replacing, B_FALSE)) anyinuse = B_TRUE; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, - B_TRUE)) + if (is_device_in_use(zhp, config, child[c], force, + replacing, B_TRUE)) anyinuse = B_TRUE; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, - B_FALSE)) + if (is_device_in_use(zhp, config, child[c], force, + replacing, B_FALSE)) anyinuse = B_TRUE; return (anyinuse); @@ -1706,7 +1729,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, return (NULL); } - if (!flags.dryrun && make_disks(zhp, newroot) != 0) { + if (!flags.dryrun && make_disks(zhp, props, newroot) != 0) { nvlist_free(newroot); return (NULL); } @@ -1775,7 +1798,8 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, * uses (such as a dedicated dump device) that even '-f' cannot * override. */ - if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { + if (is_device_in_use(zhp, poolconfig, newroot, force, replacing, + B_FALSE)) { nvlist_free(newroot); return (NULL); } @@ -1793,7 +1817,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, /* * Run through the vdev specification and label any whole disks found. */ - if (!dryrun && make_disks(zhp, newroot) != 0) { + if (!dryrun && make_disks(zhp, props, newroot) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 13b25a695639..453d4ff7e862 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -224,6 +224,7 @@ typedef enum { ZPOOL_PROP_TNAME, ZPOOL_PROP_MAXDNODESIZE, ZPOOL_PROP_MULTIHOST, + ZPOOL_PROP_PARTITION, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -858,6 +859,11 @@ typedef enum zpool_errata { ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY, } zpool_errata_t; +enum zpool_part { + ZPOOL_PARTITION_LEGACY = 0, + ZPOOL_PARTITION_RAW, +}; + /* * Vdev statistics. Note: all fields should be 64-bit because this * is passed between kernel and userland as an nvlist uint64 array. diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index d3363809d2b1..6c04751f4964 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2020,8 +2020,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) * spare; but FALSE if its an INUSE spare. */ static nvlist_t * -vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, - boolean_t *l2cache, boolean_t *log) +vdev_to_nvlist_iter(zpool_handle_t *zhp, nvlist_t *nv, nvlist_t *search, + boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { uint_t c, children; nvlist_t **child; @@ -2060,8 +2060,8 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * Search for the requested value. Special cases: * - * - ZPOOL_CONFIG_PATH for whole disk entries. These end in - * "-part1", or "p1". The suffix is hidden from the user, + * - ZPOOL_CONFIG_PATH for legacy whole disk entries. These end + * in "-part1", or "p1". The suffix is hidden from the user, * but included in the string, so this matches around it. * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname() * is used to check all possible expanded paths. @@ -2071,10 +2071,17 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, */ if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) { uint64_t wholedisk = 0; + uint64_t part = ZPOOL_PARTITION_LEGACY; + boolean_t append; + if (zhp) + part = zpool_get_prop_int(zhp, + ZPOOL_PROP_PARTITION, NULL); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); - if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0) + + append = wholedisk && (part == ZPOOL_PARTITION_LEGACY); + if (zfs_strcmp_pathname(srchval, val, append) == 0) return (nv); } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { @@ -2142,7 +2149,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, return (NULL); for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, + if ((ret = vdev_to_nvlist_iter(zhp, child[c], search, avail_spare, l2cache, NULL)) != NULL) { /* * The 'is_log' value is only set for the toplevel @@ -2163,7 +2170,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, + if ((ret = vdev_to_nvlist_iter(zhp, child[c], search, avail_spare, l2cache, NULL)) != NULL) { *avail_spare = B_TRUE; return (ret); @@ -2174,7 +2181,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, + if ((ret = vdev_to_nvlist_iter(zhp, child[c], search, avail_spare, l2cache, NULL)) != NULL) { *l2cache = B_TRUE; return (ret); @@ -2205,7 +2212,8 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + ret = vdev_to_nvlist_iter(zhp, nvroot, search, avail_spare, l2cache, + log); nvlist_free(search); return (ret); @@ -2249,7 +2257,8 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + ret = vdev_to_nvlist_iter(zhp, nvroot, search, avail_spare, l2cache, + log); nvlist_free(search); return (ret); diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index bc51a76a8ecb..c8b708203f7e 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1216,10 +1216,10 @@ zfs_resolve_shortname(const char *name, char *path, size_t len) * is done by checking all prefix expansions using either the default * 'zpool_default_import_paths' or the ZPOOL_IMPORT_PATH environment * variable. Proper partition suffixes will be appended if this is a - * whole disk. When a match is found 0 is returned otherwise ENOENT. + * legacy whole disk. When a match is found 0 is returned otherwise ENOENT. */ static int -zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk) +zfs_strcmp_shortname(char *name, char *cmp_name, int append) { int path_len, cmp_len, i = 0, error = ENOENT; char *dir, *env, *envdup = NULL; @@ -1241,7 +1241,7 @@ zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk) dir[strlen(dir)-1] = '\0'; path_len = snprintf(path_name, MAXPATHLEN, "%s/%s", dir, name); - if (wholedisk) + if (append) path_len = zfs_append_partition(path_name, MAXPATHLEN); if ((path_len == cmp_len) && strcmp(path_name, cmp_name) == 0) { @@ -1270,7 +1270,7 @@ zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk) * purposes and redundant slashes stripped to ensure an accurate match. */ int -zfs_strcmp_pathname(char *name, char *cmp, int wholedisk) +zfs_strcmp_pathname(char *name, char *cmp, int append) { int path_len, cmp_len; char path_name[MAXPATHLEN]; @@ -1289,13 +1289,13 @@ zfs_strcmp_pathname(char *name, char *cmp, int wholedisk) free(dup); if (name[0] != '/') - return (zfs_strcmp_shortname(name, cmp_name, wholedisk)); + return (zfs_strcmp_shortname(name, cmp_name, append)); (void) strlcpy(path_name, name, MAXPATHLEN); path_len = strlen(path_name); cmp_len = strlen(cmp_name); - if (wholedisk) { + if (append) { path_len = zfs_append_partition(path_name, MAXPATHLEN); if (path_len == -1) return (ENOMEM); diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index fd21f31176a5..8936e2a2aaac 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -64,6 +64,12 @@ zpool_prop_init(void) { NULL } }; + static zprop_index_t part_table[] = { + { "legacy", ZPOOL_PARTITION_LEGACY }, + { "raw", ZPOOL_PARTITION_RAW }, + { NULL } + }; + /* string properties */ zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, "", "ALTROOT"); @@ -128,6 +134,10 @@ zpool_prop_init(void) zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL, "wait | continue | panic", "FAILMODE", failuremode_table); + zprop_register_index(ZPOOL_PROP_PARTITION, "partition", + ZPOOL_PARTITION_LEGACY, PROP_ONETIME, ZFS_TYPE_POOL, + "legacy | raw", "PART", part_table); + /* hidden properties */ zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, From ea3e2ba4e14d6c7de54a47d00dad7715db4b0261 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Mon, 14 Aug 2017 14:28:43 -0700 Subject: [PATCH 2/2] Enforce PROP_ONETIME on zpool properties Signed-off-by: Chunwei Chen --- include/sys/fs/zfs.h | 1 + lib/libzfs/libzfs_pool.c | 17 ++++++++--------- module/zcommon/zpool_prop.c | 6 ++++++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 453d4ff7e862..33c59dbc55e5 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -300,6 +300,7 @@ const char *zpool_prop_to_name(zpool_prop_t); const char *zpool_prop_default_string(zpool_prop_t); uint64_t zpool_prop_default_numeric(zpool_prop_t); boolean_t zpool_prop_readonly(zpool_prop_t); +boolean_t zpool_prop_setonce(zpool_prop_t); boolean_t zpool_prop_feature(const char *); boolean_t zpool_prop_unsupported(const char *); int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 6c04751f4964..a85a4c95b76a 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -512,6 +512,14 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, goto error; } + if (!flags.create && zpool_prop_setonce(prop)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set at " + "creation time"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops, &strval, &intval, errbuf) != 0) goto error; @@ -668,15 +676,6 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, goto error; } break; - case ZPOOL_PROP_TNAME: - if (!flags.create) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set at " - "creation time"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; case ZPOOL_PROP_MULTIHOST: if (get_system_hostid() == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 8936e2a2aaac..29995ba427e2 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -181,6 +181,12 @@ zpool_prop_readonly(zpool_prop_t prop) return (zpool_prop_table[prop].pd_attr == PROP_READONLY); } +boolean_t +zpool_prop_setonce(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_attr == PROP_ONETIME); +} + const char * zpool_prop_default_string(zpool_prop_t prop) {