Skip to content

Commit

Permalink
zpool import progress reports for long imports
Browse files Browse the repository at this point in the history
When an import requires a long MMP activity check, or when the user
requests pool recovery, the import make take a long time.  The user may
not know why, or be able to tell whether the import is progressing or is
hung.

Add a new option to "zpool import", "-v" (for verbose).

When the -v option is used, create an additional thread which
periodically checks the contents of kstat/zfs/import_progress.  If the
import does not finish quickly, it reports the following on stdout:
* Name and guid of pool being imported
* User-friendly version of spa_load_state
* Expected time import will complete
* the max txg if recovering the pool

If one or more of these values change, the thread prints a new record.

A new record will be printed to stdout with a maximum frequency of one
per second so as not to spam the user.  As a result the printed output
may reflect only some of the import states transitioned through.

Sample output:

	Pool tank1 (guid 4591991398949870326):
	Checking for a remote import.
	Check will complete by Tue Apr 16 08:43:58 2019

	Pool tank1 (guid 4591991398949870326):
	Checking for a remote import.
	Check will complete by Tue Apr 16 08:44:43 2019
	Recovering Pool max txg 745

Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
  • Loading branch information
ofaaland committed Jun 7, 2021
1 parent e5e76bd commit 986d678
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 8 deletions.
162 changes: 161 additions & 1 deletion cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3398,6 +3398,133 @@ zpool_do_checkpoint(int argc, char **argv)

#define CHECKPOINT_OPT 1024

/*
* User-friendly strings for spa_load_state_t
*/
static char *load_state_names[] = {
[SPA_LOAD_NONE] = "Import Complete",
[SPA_LOAD_OPEN] = "Importing",
[SPA_LOAD_IMPORT] = "Importing",
[SPA_LOAD_TRYIMPORT] = "Verifying Configuration",
[SPA_LOAD_RECOVER] = "Recovering Pool",
[SPA_LOAD_ERROR] = "Import Error",
[SPA_LOAD_CREATE] = "Importing"
};

typedef struct import_progress_args {
boolean_t import_done;
} import_progress_args_t;

/*
* SILENT_SECS is amount of time we think user will wait before
* thinking something is broken. SLEEP_USEC is time between
* checks to see if import is done or state changed.
*/
#define IMPORT_PROGRESS_SILENT_SECS 2
#define IMPORT_PROGRESS_PERIOD_SECS 1
#define IMPORT_PROGRESS_SLEEP_USEC 100*1000

static void *
report_import_progress_thread(void *arg)
{
char path[] = "/proc/spl/kstat/zfs/import_progress";
import_progress_args_t *pa = arg;
time_t start_time = time(NULL);
time_t last_report = 0;
unsigned long long last_pool_guid = 0;
unsigned long long last_multihost_secs_time = 0;
unsigned long long last_max_txg = 0;

ASSERT(arg);

while (pa->import_done == B_FALSE) {
FILE *import_kstat;
char *pool_name;
int count;
unsigned long long load_state;
unsigned long long pool_guid;
unsigned long long multihost_secs;
unsigned long long max_txg;
char mmp_status_buf[100];
char pool_rewind_buf[100];

if ((time(NULL) - start_time) < IMPORT_PROGRESS_SILENT_SECS)
continue;

if ((time(NULL) - last_report) < IMPORT_PROGRESS_PERIOD_SECS)
continue;

last_report = time(NULL);

import_kstat = fopen(path, "r");
if (import_kstat == NULL) {
fprintf(stderr, "%s is not available to monitor the "
"progress of the import. However the import is "
"continuing.\n", path);
break;
}

/* Skip header line */
for (int c = fgetc(import_kstat); c != EOF && c != '\n';
c = fgetc(import_kstat))
;

count = fscanf(import_kstat, "%llu %llu %llu %llu %ms\n",
&pool_guid, &load_state, &multihost_secs,
&max_txg, &pool_name);

if (count == 5) {
time_t multihost_secs_time = time(NULL) +
multihost_secs;

mmp_status_buf[0] = '\0';
pool_rewind_buf[0] = '\0';

if (pool_guid != last_pool_guid ||
multihost_secs_time != last_multihost_secs_time)
snprintf(mmp_status_buf,
sizeof (mmp_status_buf), "Multihost is "
"checking for a remote import.\nCheck will "
"complete by %s",
ctime(&multihost_secs_time));

if ((pool_guid != last_pool_guid ||
max_txg != last_max_txg) &&
max_txg != 0) {
snprintf(pool_rewind_buf,
sizeof (pool_rewind_buf), "%s rewind txg "
"%llu\n", load_state_names[load_state],
max_txg);
}

if (mmp_status_buf[0] != '\0' ||
pool_rewind_buf[0] != '\0') {
printf("Pool %s (guid %llu):\n%s%s\n",
pool_name, pool_guid, mmp_status_buf,
pool_rewind_buf);
}

last_pool_guid = pool_guid;
last_multihost_secs_time = multihost_secs_time;
last_max_txg = max_txg;

if (pool_name)
free(pool_name);
} else {
fprintf(stderr, "Unable to parse %s to monitor the "
"progress of the import. However the import is "
"continuing.\n", path);
//fclose(import_kstat);
//break;
}

fclose(import_kstat);
usleep(IMPORT_PROGRESS_SLEEP_USEC);
}

return (NULL);
}

/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
Expand Down Expand Up @@ -3450,6 +3577,9 @@ zpool_do_checkpoint(int argc, char **argv)
* --rewind-to-checkpoint
* Import the pool and revert back to the checkpoint.
*
* -v Report import progress periodically, for slow imports.
*
*
* The import command scans for pools to import, and import pools based on pool
* name and GUID. The pool can also be renamed as part of the import process.
*/
Expand All @@ -3476,20 +3606,24 @@ zpool_do_import(int argc, char **argv)
boolean_t do_rewind = B_FALSE;
boolean_t xtreme_rewind = B_FALSE;
boolean_t do_scan = B_FALSE;
boolean_t do_verbose = B_FALSE;
boolean_t pool_exists = B_FALSE;
boolean_t pool_specified = B_FALSE;
uint64_t txg = -1ULL;
char *cachefile = NULL;
importargs_t idata = { 0 };
char *endptr;

pthread_t tid;
import_progress_args_t pa = { B_FALSE };

struct option long_options[] = {
{"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT},
{0, 0, 0, 0}
};

/* check options */
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX",
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:vVX",
long_options, NULL)) != -1) {
switch (c) {
case 'a':
Expand Down Expand Up @@ -3563,6 +3697,9 @@ zpool_do_import(int argc, char **argv)
}
rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND;
break;
case 'v':
do_verbose = B_TRUE;
break;
case 'V':
flags |= ZFS_IMPORT_VERBATIM;
break;
Expand Down Expand Up @@ -3711,6 +3848,15 @@ zpool_do_import(int argc, char **argv)
idata.scan = do_scan;
idata.policy = policy;

if (do_verbose) {
int pthread_err;
pthread_err = pthread_create(&tid, NULL,
report_import_progress_thread, &pa);
if (err)
fprintf(stderr, "Unable to report progress: "
"err %d. Import will continue.\n", pthread_err);
}

pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);

if (pools != NULL && pool_exists &&
Expand Down Expand Up @@ -3773,6 +3919,20 @@ zpool_do_import(int argc, char **argv)
do_all, &idata);
}

if (do_verbose) {
pa.import_done = B_TRUE;
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
}

/*
* If we were just looking for pools, report an error if none were
* found.
*/
if (argc == 0)
(void) fprintf(stderr,
gettext("no pools available to import\n"));

error:
nvlist_free(props);
nvlist_free(pools);
Expand Down
20 changes: 17 additions & 3 deletions tests/zfs-tests/tests/functional/mmp/mmp.kshlib
Original file line number Diff line number Diff line change
Expand Up @@ -221,21 +221,35 @@ check (${SECONDS}s gt $max_duration)"
return $rc
}

function import_activity_check # pool opts act_test_duration
function import_activity_check # pool opts verbosity act_test_duration
{
typeset pool=$1
typeset opts=$2
typeset min_duration=${3:-$MMP_TEST_DURATION_DEFAULT}
typeset verbose=$3
typeset min_duration=${4:-$MMP_TEST_DURATION_DEFAULT}
typeset tmpfile=$(mktemp)

if [[ $verbose = "verbose" ]]; then
opts="$opts -v"
fi

SECONDS=0
zpool import $opts $pool
zpool import $opts $pool | tee $tmpfile
typeset rc=$?

if [[ $SECONDS -le $min_duration ]]; then
log_fail "ERROR: import_activity_check expected activity check \
(${SECONDS}s le min_duration $min_duration)"
fi

if [[ $verbose = "verbose" ]]; then
if ! grep "Multihost is checking" $tmpfile; then
log_fail "ERROR: import_activity_check expected \
activity check completion time"
fi
fi

rm $tmpfile
return $rc
}

Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/functional/mmp/mmp_active_import.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ MMP_IMPORTED_MSG="pool was previously in use from another system."
log_must try_pool_import $MMP_POOL "-d $MMP_DIR" "$MMP_IMPORTED_MSG"

# 7. Verify 'zpool import -f $MMP_POOL' can now import the pool.
log_must import_activity_check $MMP_POOL "-f -d $MMP_DIR"
log_must import_activity_check $MMP_POOL "-f -d $MMP_DIR" "not_verbose"

# 8 Verify pool may be exported/imported without -f argument.
log_must zpool export $MMP_POOL
Expand Down
6 changes: 3 additions & 3 deletions tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ done
log_must zpool export -F $TESTPOOL
log_must mmp_clear_hostid
log_must mmp_set_hostid $HOSTID2
log_mustnot import_activity_check $TESTPOOL ""
log_must import_activity_check $TESTPOOL "-f"
log_mustnot import_activity_check $TESTPOOL "" not_verbose
log_must import_activity_check $TESTPOOL "-f" verbose

# 7. Verify mmp_write and mmp_fail are set correctly
log_must zpool export -F $TESTPOOL
Expand All @@ -106,6 +106,6 @@ log_mustnot import_no_activity_check $TESTPOOL "-f"
log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
log_must mmp_clear_hostid
log_must mmp_set_hostid $HOSTID1
log_must import_activity_check $TESTPOOL "-f" $MMP_TEST_DURATION_DEFAULT
log_must import_activity_check $TESTPOOL "-f" not_verbose $MMP_TEST_DURATION_DEFAULT

log_pass "multihost=on|off inactive pool activity checks passed"

0 comments on commit 986d678

Please sign in to comment.