Skip to content

Commit

Permalink
zpool import progress reports for long imports
Browse files Browse the repository at this point in the history
When an import requires a long MMP activity check, or when the user
requests pool recovery, the import make take a long time.  The user may
not know why, or be able to tell whether the import is progressing or is
hung.

Add a new option to "zpool import", "-v" (for verbose).

When the -v option is used, create an additional thread which
periodically checks the contents of kstat/zfs/import_progress.  If the
import does not finish quickly, it reports the following on stdout:
* Name and guid of pool being imported
* User-friendly version of spa_load_state
* Expected time import will complete
* the max txg if recovering the pool

If one or more of these values change, the thread prints a new record.

A new record will be printed to stderr with a maximum frequency of one
per second so as not to spam the user.  As a result the printed output
may reflect only some of the import states transitioned through.

Sample output:

	Pool tank1 (guid 4591991398949870326):
	Checking for a remote import.
	Check will complete by Tue Apr 16 08:43:58 2019

	Pool tank1 (guid 4591991398949870326):
	Checking for a remote import.
	Check will complete by Tue Apr 16 08:44:43 2019
	Recovering Pool max txg 745

Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
  • Loading branch information
ofaaland committed Aug 17, 2020
1 parent fc34dfb commit 6555737
Showing 1 changed file with 152 additions and 1 deletion.
153 changes: 152 additions & 1 deletion cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3095,6 +3095,133 @@ zpool_do_checkpoint(int argc, char **argv)

#define CHECKPOINT_OPT 1024

/*
* User-friendly strings for spa_load_state_t
*/
static char *load_state_names[] = {
[SPA_LOAD_NONE] = "Import Complete",
[SPA_LOAD_OPEN] = "Importing",
[SPA_LOAD_IMPORT] = "Importing",
[SPA_LOAD_TRYIMPORT] = "Verifying Configuration",
[SPA_LOAD_RECOVER] = "Recovering Pool",
[SPA_LOAD_ERROR] = "Import Error",
[SPA_LOAD_CREATE] = "Importing"
};

typedef struct import_progress_args {
boolean_t import_done;
} import_progress_args_t;

/*
* SILENT_SECS is amount of time we think user will wait before
* thinking something is broken. SLEEP_USEC is time between
* checks to see if import is done or state changed.
*/
#define IMPORT_PROGRESS_SILENT_SECS 2
#define IMPORT_PROGRESS_PERIOD_SECS 1
#define IMPORT_PROGRESS_SLEEP_USEC 100*1000

static void *
report_import_progress_thread(void *arg)
{
char path[] = "/proc/spl/kstat/zfs/import_progress";
import_progress_args_t *pa = arg;
time_t start_time = time(NULL);
time_t last_report = 0;
unsigned long long last_pool_guid = 0;
unsigned long long last_multihost_secs_time = 0;
unsigned long long last_max_txg = 0;

ASSERT(arg);

while (pa->import_done == B_FALSE) {
FILE *import_kstat;
char *pool_name;
int count;
unsigned long long load_state;
unsigned long long pool_guid;
unsigned long long multihost_secs;
unsigned long long max_txg;
char mmp_status_buf[100];
char pool_rewind_buf[100];

if ((time(NULL) - start_time) < IMPORT_PROGRESS_SILENT_SECS)
continue;

if ((time(NULL) - last_report) < IMPORT_PROGRESS_PERIOD_SECS)
continue;

last_report = time(NULL);

import_kstat = fopen(path, "r");
if (import_kstat == NULL) {
fprintf(stderr, "%s is not available to monitor the "
"progress of the import. However the import is "
"continuing.\n", path);
break;
}

/* Skip header line */
for (int c = fgetc(import_kstat); c != EOF && c != '\n';
c = fgetc(import_kstat))
;

count = fscanf(import_kstat, "%llu %llu %llu %llu %ms\n",
&pool_guid, &load_state, &multihost_secs,
&max_txg, &pool_name);

if (count == 5) {
time_t multihost_secs_time = time(NULL) +
multihost_secs;

mmp_status_buf[0] = '\0';
pool_rewind_buf[0] = '\0';

if (pool_guid != last_pool_guid ||
multihost_secs_time != last_multihost_secs_time)
snprintf(mmp_status_buf,
sizeof (mmp_status_buf), "Multihost is "
"checking for a remote import.\nCheck will "
"complete by %s",
ctime(&multihost_secs_time));

if ((pool_guid != last_pool_guid ||
max_txg != last_max_txg) &&
max_txg != 0) {
snprintf(pool_rewind_buf,
sizeof (pool_rewind_buf), "%s rewind txg "
"%llu\n", load_state_names[load_state],
max_txg);
}

if (mmp_status_buf[0] != '\0' ||
pool_rewind_buf[0] != '\0') {
fprintf(stderr, "Pool %s (guid %llu):\n%s%s\n",
pool_name, pool_guid, mmp_status_buf,
pool_rewind_buf);
}

last_pool_guid = pool_guid;
last_multihost_secs_time = multihost_secs_time;
last_max_txg = max_txg;

if (pool_name)
free(pool_name);
} else {
fprintf(stderr, "Unable to parse %s to monitor the "
"progress of the import. However the import is "
"continuing.\n", path);
//fclose(import_kstat);
//break;
}

fclose(import_kstat);
usleep(IMPORT_PROGRESS_SLEEP_USEC);
}

return (NULL);
}

/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
Expand Down Expand Up @@ -3141,6 +3268,8 @@ zpool_do_checkpoint(int argc, char **argv)
* -s Scan using the default search path, the libblkid cache will
* not be consulted.
*
* -v Report import progress periodically, for slow imports.
*
* --rewind-to-checkpoint
* Import the pool and revert back to the checkpoint.
*
Expand Down Expand Up @@ -3174,19 +3303,23 @@ zpool_do_import(int argc, char **argv)
boolean_t do_rewind = B_FALSE;
boolean_t xtreme_rewind = B_FALSE;
boolean_t do_scan = B_FALSE;
boolean_t do_verbose = B_FALSE;
boolean_t pool_exists = B_FALSE;
uint64_t pool_state, txg = -1ULL;
char *cachefile = NULL;
importargs_t idata = { 0 };
char *endptr;

pthread_t tid;
import_progress_args_t pa = { B_FALSE };

struct option long_options[] = {
{"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT},
{0, 0, 0, 0}
};

/* check options */
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX",
while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:vVX",
long_options, NULL)) != -1) {
switch (c) {
case 'a':
Expand Down Expand Up @@ -3268,6 +3401,9 @@ zpool_do_import(int argc, char **argv)
}
rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND;
break;
case 'v':
do_verbose = B_TRUE;
break;
case 'V':
flags |= ZFS_IMPORT_VERBATIM;
break;
Expand Down Expand Up @@ -3419,6 +3555,15 @@ zpool_do_import(int argc, char **argv)
idata.scan = do_scan;
idata.policy = policy;

if (do_verbose) {
int pthread_err;
pthread_err = pthread_create(&tid, NULL,
report_import_progress_thread, &pa);
if (err)
fprintf(stderr, "Unable to report progress: "
"err %d. Import will continue.\n", pthread_err);
}

pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);

if (pools != NULL && pool_exists &&
Expand Down Expand Up @@ -3540,6 +3685,12 @@ zpool_do_import(int argc, char **argv)
}
}

if (do_verbose) {
pa.import_done = B_TRUE;
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
}

/*
* If we were just looking for pools, report an error if none were
* found.
Expand Down

0 comments on commit 6555737

Please sign in to comment.