Skip to content

Commit

Permalink
Add pool prop partition to disable auto partition
Browse files Browse the repository at this point in the history
zfsonlinux always partition disk when it detects the device given is a
whole disk. This legacy behavior from Illumos, however, has no apparent
benefit on Linux, but has some down sides besides confusion. E.g.
autoexpand, switching to dm device requires partprobe.

We add a pool property `partition` to be set during pool create. It
currently has two values, legacy and raw. When setting it to legacy, it
will behave as it did. When setiing it to raw, it will always use the
device as is without partitioning even if it's a whole disk.

This property applies to all commands that add disks to pool, so zpool
add/attach/replace will partition or not partition based on the property
on the target pool.

A pool without this property will be treated as legacy. Newly created
pool will by default have partition=raw.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
  • Loading branch information
Chunwei Chen committed Jun 28, 2017
1 parent 2d678f7 commit 646aab0
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 35 deletions.
5 changes: 5 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,11 @@ zpool_do_create(int argc, char **argv)
}
}

/* Make new pools default to partition=raw */
if (add_prop_list_default(zpool_prop_to_name(ZPOOL_PROP_PARTITION),
"raw", &props, B_TRUE))
goto errout;

argc -= optind;
argv += optind;

Expand Down
63 changes: 44 additions & 19 deletions cmd/zpool/zpool_vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1188,29 +1188,47 @@ zero_label(char *path)
* need to get the devid after we label the disk.
*/
static int
make_disks(zpool_handle_t *zhp, nvlist_t *nv)
make_disks(zpool_handle_t *zhp, nvlist_t *props, nvlist_t *nv)
{
nvlist_t **child;
uint_t c, children;
char *type, *path;
char devpath[MAXPATHLEN];
char udevpath[MAXPATHLEN];
uint64_t wholedisk;
uint64_t part = ZPOOL_PARTITION_LEGACY;
struct stat64 statbuf;
int is_exclusive = 0;
int fd;
int ret;

verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);

/*
* Find out the partition type. If zhp exists, this is an existing
* pool, get it from pool property. Otherwise, this is pool creation,
* get it from cmdline from props.
*/
if (zhp)
part = zpool_get_prop_int(zhp, ZPOOL_PROP_PARTITION, NULL);
else if (props) {
char *strval;
uint64_t index;
if (nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_PARTITION), &strval) == 0 &&
zpool_prop_string_to_index(ZPOOL_PROP_PARTITION, strval,
&index) == 0)
part = index;
}

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {

if (strcmp(type, VDEV_TYPE_DISK) != 0)
return (0);

/*
* We have a disk device. If this is a whole disk write
* We have a disk device. If this is a legacy whole disk write
* out the efi partition table, otherwise write zero's to
* the first 4k of the partition. This is to ensure that
* libblkid will not misidentify the partition due to a
Expand All @@ -1220,14 +1238,15 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&wholedisk));

if (!wholedisk) {
if (!wholedisk || part == ZPOOL_PARTITION_RAW) {
/*
* Update device id string for mpath nodes (Linux only)
*/
if (is_mpath_whole_disk(path))
if (wholedisk || is_mpath_whole_disk(path))
update_vdev_config_dev_strs(nv);

(void) zero_label(path);
if (!is_spare(NULL, path))
(void) zero_label(path);
return (0);
}

Expand Down Expand Up @@ -1318,19 +1337,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
}

for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0)
if ((ret = make_disks(zhp, props, child[c])) != 0)
return (ret);

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0)
if ((ret = make_disks(zhp, props, child[c])) != 0)
return (ret);

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = make_disks(zhp, child[c])) != 0)
if ((ret = make_disks(zhp, props, child[c])) != 0)
return (ret);

return (0);
Expand All @@ -1341,8 +1360,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
* the majority of this task.
*/
static boolean_t
is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
boolean_t replacing, boolean_t isspare)
is_device_in_use(zpool_handle_t *zhp, nvlist_t *config, nvlist_t *nv,
boolean_t force, boolean_t replacing, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
Expand All @@ -1368,8 +1387,13 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
* regardless of what libblkid or zpool_in_use() says.
*/
if (replacing) {
uint64_t part = ZPOOL_PARTITION_LEGACY;
if (zhp)
part = zpool_get_prop_int(zhp,
ZPOOL_PROP_PARTITION, NULL);

(void) strlcpy(buf, path, sizeof (buf));
if (wholedisk) {
if (wholedisk && part == ZPOOL_PARTITION_LEGACY) {
ret = zfs_append_partition(buf, sizeof (buf));
if (ret == -1)
return (-1);
Expand All @@ -1389,22 +1413,22 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
}

for (c = 0; c < children; c++)
if (is_device_in_use(config, child[c], force, replacing,
if (is_device_in_use(zhp, config, child[c], force, replacing,
B_FALSE))
anyinuse = B_TRUE;

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if (is_device_in_use(config, child[c], force, replacing,
B_TRUE))
if (is_device_in_use(zhp, config, child[c], force,
replacing, B_TRUE))
anyinuse = B_TRUE;

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if (is_device_in_use(config, child[c], force, replacing,
B_FALSE))
if (is_device_in_use(zhp, config, child[c], force,
replacing, B_FALSE))
anyinuse = B_TRUE;

return (anyinuse);
Expand Down Expand Up @@ -1705,7 +1729,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
return (NULL);
}

if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
if (!flags.dryrun && make_disks(zhp, props, newroot) != 0) {
nvlist_free(newroot);
return (NULL);
}
Expand Down Expand Up @@ -1774,7 +1798,8 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
* uses (such as a dedicated dump device) that even '-f' cannot
* override.
*/
if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
if (is_device_in_use(zhp, poolconfig, newroot, force, replacing,
B_FALSE)) {
nvlist_free(newroot);
return (NULL);
}
Expand All @@ -1792,7 +1817,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
/*
* Run through the vdev specification and label any whole disks found.
*/
if (!dryrun && make_disks(zhp, newroot) != 0) {
if (!dryrun && make_disks(zhp, props, newroot) != 0) {
nvlist_free(newroot);
return (NULL);
}
Expand Down
6 changes: 6 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ typedef enum {
ZPOOL_PROP_MAXBLOCKSIZE,
ZPOOL_PROP_TNAME,
ZPOOL_PROP_MAXDNODESIZE,
ZPOOL_PROP_PARTITION,
ZPOOL_NUM_PROPS
} zpool_prop_t;

Expand Down Expand Up @@ -819,6 +820,11 @@ typedef enum zpool_errata {
ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY,
} zpool_errata_t;

enum zpool_part {
ZPOOL_PARTITION_LEGACY = 0,
ZPOOL_PARTITION_RAW,
};

/*
* Vdev statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
Expand Down
29 changes: 19 additions & 10 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1949,8 +1949,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
* spare; but FALSE if its an INUSE spare.
*/
static nvlist_t *
vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
boolean_t *l2cache, boolean_t *log)
vdev_to_nvlist_iter(zpool_handle_t *zhp, nvlist_t *nv, nvlist_t *search,
boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
{
uint_t c, children;
nvlist_t **child;
Expand Down Expand Up @@ -1989,8 +1989,8 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
/*
* Search for the requested value. Special cases:
*
* - ZPOOL_CONFIG_PATH for whole disk entries. These end in
* "-part1", or "p1". The suffix is hidden from the user,
* - ZPOOL_CONFIG_PATH for legacy whole disk entries. These end
* in "-part1", or "p1". The suffix is hidden from the user,
* but included in the string, so this matches around it.
* - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
* is used to check all possible expanded paths.
Expand All @@ -2000,10 +2000,17 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
*/
if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
uint64_t wholedisk = 0;
uint64_t part = ZPOOL_PARTITION_LEGACY;
boolean_t append;

if (zhp)
part = zpool_get_prop_int(zhp,
ZPOOL_PROP_PARTITION, NULL);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&wholedisk);
if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)

append = wholedisk && (part == ZPOOL_PARTITION_LEGACY);
if (zfs_strcmp_pathname(srchval, val, append) == 0)
return (nv);

} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
Expand Down Expand Up @@ -2071,7 +2078,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
return (NULL);

for (c = 0; c < children; c++) {
if ((ret = vdev_to_nvlist_iter(child[c], search,
if ((ret = vdev_to_nvlist_iter(zhp, child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
/*
* The 'is_log' value is only set for the toplevel
Expand All @@ -2092,7 +2099,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if ((ret = vdev_to_nvlist_iter(child[c], search,
if ((ret = vdev_to_nvlist_iter(zhp, child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*avail_spare = B_TRUE;
return (ret);
Expand All @@ -2103,7 +2110,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if ((ret = vdev_to_nvlist_iter(child[c], search,
if ((ret = vdev_to_nvlist_iter(zhp, child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*l2cache = B_TRUE;
return (ret);
Expand Down Expand Up @@ -2134,7 +2141,8 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
*l2cache = B_FALSE;
if (log != NULL)
*log = B_FALSE;
ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
ret = vdev_to_nvlist_iter(zhp, nvroot, search, avail_spare, l2cache,
log);
nvlist_free(search);

return (ret);
Expand Down Expand Up @@ -2178,7 +2186,8 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
*l2cache = B_FALSE;
if (log != NULL)
*log = B_FALSE;
ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
ret = vdev_to_nvlist_iter(zhp, nvroot, search, avail_spare, l2cache,
log);
nvlist_free(search);

return (ret);
Expand Down
12 changes: 6 additions & 6 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1203,10 +1203,10 @@ zfs_resolve_shortname(const char *name, char *path, size_t len)
* is done by checking all prefix expansions using either the default
* 'zpool_default_import_paths' or the ZPOOL_IMPORT_PATH environment
* variable. Proper partition suffixes will be appended if this is a
* whole disk. When a match is found 0 is returned otherwise ENOENT.
* legacy whole disk. When a match is found 0 is returned otherwise ENOENT.
*/
static int
zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk)
zfs_strcmp_shortname(char *name, char *cmp_name, int append)
{
int path_len, cmp_len, i = 0, error = ENOENT;
char *dir, *env, *envdup = NULL;
Expand All @@ -1228,7 +1228,7 @@ zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk)
dir[strlen(dir)-1] = '\0';

path_len = snprintf(path_name, MAXPATHLEN, "%s/%s", dir, name);
if (wholedisk)
if (append)
path_len = zfs_append_partition(path_name, MAXPATHLEN);

if ((path_len == cmp_len) && strcmp(path_name, cmp_name) == 0) {
Expand Down Expand Up @@ -1257,7 +1257,7 @@ zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk)
* purposes and redundant slashes stripped to ensure an accurate match.
*/
int
zfs_strcmp_pathname(char *name, char *cmp, int wholedisk)
zfs_strcmp_pathname(char *name, char *cmp, int append)
{
int path_len, cmp_len;
char path_name[MAXPATHLEN];
Expand All @@ -1276,13 +1276,13 @@ zfs_strcmp_pathname(char *name, char *cmp, int wholedisk)
free(dup);

if (name[0] != '/')
return (zfs_strcmp_shortname(name, cmp_name, wholedisk));
return (zfs_strcmp_shortname(name, cmp_name, append));

(void) strlcpy(path_name, name, MAXPATHLEN);
path_len = strlen(path_name);
cmp_len = strlen(cmp_name);

if (wholedisk) {
if (append) {
path_len = zfs_append_partition(path_name, MAXPATHLEN);
if (path_len == -1)
return (ENOMEM);
Expand Down
10 changes: 10 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ zpool_prop_init(void)
{ NULL }
};

static zprop_index_t part_table[] = {
{ "legacy", ZPOOL_PARTITION_LEGACY },
{ "raw", ZPOOL_PARTITION_RAW },
{ NULL }
};

/* string properties */
zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
ZFS_TYPE_POOL, "<path>", "ALTROOT");
Expand Down Expand Up @@ -125,6 +131,10 @@ zpool_prop_init(void)
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
"wait | continue | panic", "FAILMODE", failuremode_table);
zprop_register_index(ZPOOL_PROP_PARTITION, "partition",
ZPOOL_PARTITION_LEGACY, PROP_ONETIME, ZFS_TYPE_POOL,
"legacy | raw", "PART", part_table);


/* hidden properties */
zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
Expand Down

0 comments on commit 646aab0

Please sign in to comment.