Skip to content

Commit

Permalink
OpenZFS 8115 - parallel zfs mount
Browse files Browse the repository at this point in the history
Porting Notes:
* Use thread pools (tpool) API instead of introducing taskq interfaces
  to libzfs.
* Use pthread_mutext for locks as mutex_t isn't available.
* Ignore alternative libshare initialization since OpenZFS-7955 is
  not present on zfsonlinux.

Authored by: Sebastien Roy <seb@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Prashanth Sreenivasa <pks@delphix.com>
Authored by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Matt Ahrens <mahrens@delphix.com>
Ported-by: Don Brady <don.brady@delphix.com>

OpenZFS-issue: https://www.illumos.org/issues/8115
OpenZFS-commit: openzfs/openzfs@a3f0e2b569
Closes openzfs#8092
  • Loading branch information
Sebastien Roy authored and behlendorf committed Nov 15, 2018
1 parent af2e841 commit a10d50f
Show file tree
Hide file tree
Showing 10 changed files with 716 additions and 139 deletions.
103 changes: 73 additions & 30 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6059,7 +6059,12 @@ zfs_do_holds(int argc, char **argv)

#define CHECK_SPINNER 30
#define SPINNER_TIME 3 /* seconds */
#define MOUNT_TIME 5 /* seconds */
#define MOUNT_TIME 1 /* seconds */

typedef struct get_all_state {
boolean_t ga_verbose;
get_all_cb_t *ga_cbp;
} get_all_state_t;

static int
get_one_dataset(zfs_handle_t *zhp, void *data)
Expand All @@ -6068,10 +6073,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
static int spinval = 0;
static int spincheck = 0;
static time_t last_spin_time = (time_t)0;
get_all_cb_t *cbp = data;
get_all_state_t *state = data;
zfs_type_t type = zfs_get_type(zhp);

if (cbp->cb_verbose) {
if (state->ga_verbose) {
if (--spincheck < 0) {
time_t now = time(NULL);
if (last_spin_time + SPINNER_TIME < now) {
Expand All @@ -6097,25 +6102,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
libzfs_add_handle(cbp, zhp);
assert(cbp->cb_used <= cbp->cb_alloc);
libzfs_add_handle(state->ga_cbp, zhp);
assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);

return (0);
}

static void
get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
{
get_all_cb_t cb = { 0 };
cb.cb_verbose = verbose;
cb.cb_getone = get_one_dataset;
get_all_state_t state = {
.ga_verbose = verbose,
.ga_cbp = cbp
};

if (verbose)
set_progress_header(gettext("Reading ZFS config"));
(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);

*dslist = cb.cb_handles;
*count = cb.cb_used;
(void) zfs_iter_root(g_zfs, get_one_dataset, &state);

if (verbose)
finish_progress(gettext("done."));
Expand All @@ -6126,8 +6129,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
* similar, we have a common function with an extra parameter to determine which
* mode we are using.
*/
#define OP_SHARE 0x1
#define OP_MOUNT 0x2
typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;

typedef struct share_mount_state {
share_mount_op_t sm_op;
boolean_t sm_verbose;
int sm_flags;
char *sm_options;
char *sm_proto; /* only valid for OP_SHARE */
pthread_mutex_t sm_lock; /* protects the remaining fields */
uint_t sm_total; /* number of filesystems to process */
uint_t sm_done; /* number of filesystems processed */
int sm_status; /* -1 if any of the share/mount operations failed */
} share_mount_state_t;

/*
* Share or mount a dataset.
Expand Down Expand Up @@ -6385,6 +6399,29 @@ report_mount_progress(int current, int total)
update_progress(info);
}

/*
* zfs_foreach_mountpoint() callback that mounts or shares one filesystem and
* updates the progress meter.
*/
static int
share_mount_one_cb(zfs_handle_t *zhp, void *arg)
{
share_mount_state_t *sms = arg;
int ret;

ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
B_FALSE, sms->sm_options);

pthread_mutex_lock(&sms->sm_lock);
if (ret != 0)
sms->sm_status = ret;
sms->sm_done++;
if (sms->sm_verbose)
report_mount_progress(sms->sm_done, sms->sm_total);
pthread_mutex_unlock(&sms->sm_lock);
return (ret);
}

static void
append_options(char *mntopts, char *newopts)
{
Expand Down Expand Up @@ -6459,8 +6496,6 @@ share_mount(int op, int argc, char **argv)

/* check number of arguments */
if (do_all) {
zfs_handle_t **dslist = NULL;
size_t i, count = 0;
char *protocol = NULL;

if (op == OP_SHARE && argc > 0) {
Expand All @@ -6481,27 +6516,35 @@ share_mount(int op, int argc, char **argv)
}

start_progress_timer();
get_all_datasets(&dslist, &count, verbose);
get_all_cb_t cb = { 0 };
get_all_datasets(&cb, verbose);

if (count == 0) {
if (cb.cb_used == 0) {
if (options != NULL)
free(options);
return (0);
}

qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);

for (i = 0; i < count; i++) {
if (verbose)
report_mount_progress(i, count);
share_mount_state_t share_mount_state = { 0 };
share_mount_state.sm_op = op;
share_mount_state.sm_verbose = verbose;
share_mount_state.sm_flags = flags;
share_mount_state.sm_options = options;
share_mount_state.sm_proto = protocol;
share_mount_state.sm_total = cb.cb_used;
pthread_mutex_init(&share_mount_state.sm_lock, NULL);

if (share_mount_one(dslist[i], op, flags, protocol,
B_FALSE, options) != 0)
ret = 1;
zfs_close(dslist[i]);
}
/*
* libshare isn't mt-safe, so only do the operation in parallel
* if we're mounting.
*/
zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
share_mount_one_cb, &share_mount_state, op == OP_MOUNT);
ret = share_mount_state.sm_status;

free(dslist);
for (int i = 0; i < cb.cb_used; i++)
zfs_close(cb.cb_handles[i]);
free(cb.cb_handles);
} else if (argc == 0) {
struct mnttab entry;

Expand Down
5 changes: 2 additions & 3 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,11 @@ typedef struct get_all_cb {
zfs_handle_t **cb_handles;
size_t cb_alloc;
size_t cb_used;
boolean_t cb_verbose;
int (*cb_getone)(zfs_handle_t *, void *);
} get_all_cb_t;

void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
zfs_iter_f, void *, boolean_t);
void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
int libzfs_dataset_cmp(const void *, const void *);

/*
* Functions to create and destroy datasets.
Expand Down
9 changes: 8 additions & 1 deletion include/libzfs_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2018 Datto Inc.
*/

Expand Down Expand Up @@ -60,6 +60,13 @@ struct libzfs_handle {
void *libzfs_sharehdl; /* libshare handle */
uint_t libzfs_shareflags;
boolean_t libzfs_mnttab_enable;
/*
* We need a lock to handle the case where parallel mount
* threads are populating the mnttab cache simultaneously. The
* lock only protects the integrity of the avl tree, and does
* not protect the contents of the mnttab entries themselves.
*/
pthread_mutex_t libzfs_mnttab_cache_lock;
avl_tree_t libzfs_mnttab_cache;
int libzfs_pool_iter;
char libzfs_chassis_id[256];
Expand Down
46 changes: 33 additions & 13 deletions lib/libzfs/libzfs_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
void
libzfs_mnttab_init(libzfs_handle_t *hdl)
{
pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL);
assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
Expand Down Expand Up @@ -849,6 +850,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl)
free(mtn);
}
avl_destroy(&hdl->libzfs_mnttab_cache);
(void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
}

void
Expand All @@ -863,7 +865,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
{
mnttab_node_t find;
mnttab_node_t *mtn;
int error;
int ret = ENOENT;

if (!hdl->libzfs_mnttab_enable) {
struct mnttab srch = { 0 };
Expand All @@ -883,17 +885,24 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
return (ENOENT);
}

if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
if ((error = libzfs_mnttab_update(hdl)) != 0)
pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) {
int error;

if ((error = libzfs_mnttab_update(hdl)) != 0) {
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
return (error);
}
}

find.mtn_mt.mnt_special = (char *)fsname;
mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
if (mtn) {
*entry = mtn->mtn_mt;
return (0);
ret = 0;
}
return (ENOENT);
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
return (ret);
}

void
Expand All @@ -902,14 +911,23 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
{
mnttab_node_t *mtn;

if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
return;
mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
avl_add(&hdl->libzfs_mnttab_cache, mtn);
pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) {
mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
/*
* Another thread may have already added this entry
* via libzfs_mnttab_update. If so we should skip it.
*/
if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL)
free(mtn);
else
avl_add(&hdl->libzfs_mnttab_cache, mtn);
}
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
}

void
Expand All @@ -918,6 +936,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
mnttab_node_t find;
mnttab_node_t *ret;

pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
find.mtn_mt.mnt_special = (char *)fsname;
if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))
!= NULL) {
Expand All @@ -928,6 +947,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
free(ret->mtn_mt.mnt_mntopts);
free(ret);
}
pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
}

int
Expand Down
Loading

0 comments on commit a10d50f

Please sign in to comment.