Skip to content

Commit

Permalink
zfs: add zpool ddtload subcommand
Browse files Browse the repository at this point in the history
Implement an ioctl which interfaces with a DDT adapter callback for
loading all entries of a given DDT object type, and calls it for every
DDT object that exists in a pool.

Implement the ZAP adapter callback by prefetching the entire zap object.

This subcommand enables users to pre-warm (or re-warm) the cache for DDT
entries if they reboot or otherwise perform an export/import cycle, and
skip the wait for the entries to be loaded, to restore normal I/O write
performance to the pool.

Signed-off-by:	Will Andrews <will@firepipe.net>

Conflicts:
	cmd/zpool/zpool_main.c
	include/libzfs_core.h
	include/sys/fs/zfs.h
  • Loading branch information
wca authored and Bryant G. Ly committed Oct 15, 2019
1 parent 79416f1 commit e8a77a0
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 4 deletions.
45 changes: 45 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ static int zpool_do_remove(int, char **);
static int zpool_do_labelclear(int, char **);

static int zpool_do_checkpoint(int, char **);
static int zpool_do_ddtload(int, char **);

static int zpool_do_list(int, char **);
static int zpool_do_iostat(int, char **);
Expand Down Expand Up @@ -143,6 +144,7 @@ typedef enum {
HELP_CLEAR,
HELP_CREATE,
HELP_CHECKPOINT,
HELP_DDTLOAD,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
Expand Down Expand Up @@ -278,6 +280,7 @@ static zpool_command_t command_table[] = {
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
{ NULL },
{ "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT },
{ "ddtload", zpool_do_ddtload, HELP_DDTLOAD },
{ NULL },
{ "list", zpool_do_list, HELP_LIST },
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
Expand Down Expand Up @@ -338,6 +341,8 @@ get_usage(zpool_help_t idx)
"\t [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
case HELP_CHECKPOINT:
return (gettext("\tcheckpoint [--discard] <pool> ...\n"));
case HELP_DDTLOAD:
return (gettext("\tddtload <pool>\n"));
case HELP_DESTROY:
return (gettext("\tdestroy [-f] <pool>\n"));
case HELP_DETACH:
Expand Down Expand Up @@ -2940,6 +2945,46 @@ zpool_do_checkpoint(int argc, char **argv)

#define CHECKPOINT_OPT 1024

/*
* zpool ddtload <pool>
*
* Loads the DDT table of the specified pool.
*/
int
zpool_do_ddtload(int argc, char **argv)
{
char *pool;
zpool_handle_t *zhp;
int err;

argc--;
argv++;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

pool = argv[0];

if ((zhp = zpool_open(g_zfs, pool)) == NULL) {
/* As a special case, check for use of '/' in the name */
if (strchr(pool, '/') != NULL) {
(void) fprintf(stderr, gettext("'zpool ddtload' "
"doesn't work on datasets.\n"));
}
return (1);
}

err = zpool_ddtload(zhp);
zpool_close(zhp);

return (err);
}

/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
Expand Down
1 change: 1 addition & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
nvlist_t *);
extern int zpool_checkpoint(zpool_handle_t *);
extern int zpool_discard_checkpoint(zpool_handle_t *);
extern int zpool_ddtload(zpool_handle_t *);

/*
* Basic handle manipulations. These functions do not create or destroy the
Expand Down
2 changes: 2 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ int lzc_reopen(const char *, boolean_t);
int lzc_pool_checkpoint(const char *);
int lzc_pool_checkpoint_discard(const char *);

int lzc_pool_ddtload(const char *);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ typedef struct ddt_ops {
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
void (*ddt_op_loadall)(objset_t *os, uint64_t object);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
ddt_entry_t *dde);
Expand Down Expand Up @@ -229,6 +230,7 @@ extern void ddt_exit(ddt_t *ddt);
extern void ddt_init(void);
extern void ddt_fini(void);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_loadall(ddt_t *ddt);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);

Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1272,6 +1272,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */
ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
ZFS_IOC_POOL_DDTLOAD, /* 0x5a54 */

/*
* Linux - 3/64 numbers reserved.
Expand Down
1 change: 1 addition & 0 deletions include/sys/zap.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);

Expand Down
21 changes: 21 additions & 0 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1469,6 +1469,27 @@ zpool_discard_checkpoint(zpool_handle_t *zhp)
return (0);
}

/*
* Load the DDT table for the given pool.
*/
int
zpool_ddtload(zpool_handle_t *zhp)
{
libzfs_handle_t *hdl = zhp->zpool_hdl;
char msg[1024];
int error;

error = lzc_pool_ddtload(zhp->zpool_name);
if (error != 0) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot load DDT in '%s'"), zhp->zpool_name);
(void) zpool_standard_error(hdl, error, msg);
return (-1);
}

return (0);
}

/*
* Add the given vdevs to the pool. The caller must have already performed the
* necessary verification to ensure that the vdev specification is well-formed.
Expand Down
18 changes: 18 additions & 0 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,24 @@ lzc_pool_checkpoint_discard(const char *pool)
return (error);
}

/*
* Load the DDT table for the specified pool.
*/
int
lzc_pool_ddtload(const char *pool)
{
int error;
nvlist_t *result = NULL;
nvlist_t *args = fnvlist_alloc();

error = lzc_ioctl(ZFS_IOC_POOL_DDTLOAD, pool, args, &result);

fnvlist_free(args);
fnvlist_free(result);

return (error);
}

/*
* Executes a read-only channel program.
*
Expand Down
11 changes: 11 additions & 0 deletions man/man8/zpool.8
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@
.Op Fl R Ar root
.Ar pool vdev Ns ...
.Nm
.Cm ddtload
.Ar pool
.Nm
.Cm destroy
.Op Fl f
.Ar pool
Expand Down Expand Up @@ -1239,6 +1242,14 @@ block devices.
.El
.It Xo
.Nm
.Cm ddtload
.Ar pool
.Xc
Loads the DDT for the given pool, enabling improved write I/O performance
within the bounds of ARC.
.El
.It Xo
.Nm
.Cm destroy
.Op Fl f
.Ar pool
Expand Down
27 changes: 27 additions & 0 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,15 @@ ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
}

static void
ddt_object_loadall(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
{
if (ddt_object_exists(ddt, type, class)) {
ddt_ops[type]->ddt_op_loadall(ddt->ddt_os,
ddt->ddt_object[type][class]);
}
}

static int
ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddt_entry_t *dde)
Expand Down Expand Up @@ -731,6 +740,24 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
ddt_free(dde);
}

void
ddt_loadall(ddt_t *ddt)
{
enum ddt_type type;
enum ddt_class class;

/*
* Load all DDT entries for each type/class combination. This is
* intended to perform a prefetch on all such blocks. For the same
* reason that ddt_prefetch isn't locked, this is also not locked.
*/
for (type = 0; type < DDT_TYPES; type++) {
for (class = 0; class < DDT_CLASSES; class++) {
ddt_object_loadall(ddt, type, class);
}
}
}

ddt_entry_t *
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
{
Expand Down
16 changes: 12 additions & 4 deletions module/zfs/ddt_zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,23 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
return (zap_destroy(os, object, tx));
}

static void
ddt_zap_loadall(objset_t *os, uint64_t object)
{

(void) zap_prefetch_object(os, object);
return;
}

static int
ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
{
uchar_t *cbuf;
uint64_t one, csize;
int error;
uint64_t one, csize;

cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP);

error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
error = zap_length_uint64(os, object, (const uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, &one, &csize);
if (error)
goto out;
Expand All @@ -78,9 +85,9 @@ ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
goto out;

ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys));

out:
kmem_free(cbuf, sizeof (dde->dde_phys) + 1);

return (error);
}

Expand Down Expand Up @@ -159,6 +166,7 @@ const ddt_ops_t ddt_zap_ops = {
"zap",
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_loadall,
ddt_zap_lookup,
ddt_zap_prefetch,
ddt_zap_update,
Expand Down
17 changes: 17 additions & 0 deletions module/zfs/zap_micro.c
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,22 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
return (err);
}

int
zap_prefetch_object(objset_t *os, uint64_t zapobj)
{
int error;
dmu_object_info_t doi;

error = dmu_object_info(os, zapobj, &doi);
if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
error = SET_ERROR(EINVAL);
if (error == 0) {
dmu_prefetch(os, zapobj, /*level*/ 0, /*offset*/ 0,
doi.doi_max_offset, ZIO_PRIORITY_SYNC_READ);
}
return (error);
}

int
zap_lookup_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf)
Expand Down Expand Up @@ -1665,6 +1681,7 @@ EXPORT_SYMBOL(zap_lookup_uint64);
EXPORT_SYMBOL(zap_contains);
EXPORT_SYMBOL(zap_prefetch);
EXPORT_SYMBOL(zap_prefetch_uint64);
EXPORT_SYMBOL(zap_prefetch_object);
EXPORT_SYMBOL(zap_add);
EXPORT_SYMBOL(zap_add_by_dnode);
EXPORT_SYMBOL(zap_add_uint64);
Expand Down
34 changes: 34 additions & 0 deletions module/zfs/zfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -3829,6 +3829,34 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
return (spa_checkpoint_discard(poolname));
}

/*
* innvl: unused
* outnvl: empty
*/
static const zfs_ioc_key_t zfs_keys_pool_ddtload[] = {
/* no nvl keys */
};

/* ARGSUSED */
static int
zfs_ioc_pool_ddtload(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
{
int error;
spa_t *spa;

error = spa_open(poolname, &spa, FTAG);
if (error != 0)
return (error);

for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_loadall(spa->spa_ddt[c]);
}

spa_close(spa, FTAG);

return (error);
}

/*
* inputs:
* zc_name name of dataset to destroy
Expand Down Expand Up @@ -6660,6 +6688,12 @@ zfs_ioctl_init(void)
zfs_keys_pool_discard_checkpoint,
ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));

zfs_ioctl_register("zpool_ddtload",
ZFS_IOC_POOL_DDTLOAD, zfs_ioc_pool_ddtload,
zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
zfs_keys_pool_ddtload, ARRAY_SIZE(zfs_keys_pool_ddtload));

zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
Expand Down

0 comments on commit e8a77a0

Please sign in to comment.