From e8a77a06972cd2458ea5e9ecc8f64e35a685940e Mon Sep 17 00:00:00 2001 From: Will Andrews Date: Mon, 23 Sep 2019 02:53:49 +0800 Subject: [PATCH] zfs: add zpool ddtload subcommand Implement an ioctl which interfaces with a DDT adapter callback for loading all entries of a given DDT object type, and calls it for every DDT object that exists in a pool. Implement the ZAP adapter callback by prefetching the entire zap object. This subcommand enables users to pre-warm (or re-warm) the cache for DDT entries if they reboot or otherwise perform an export/import cycle, and skip the wait for the entries to be loaded, to restore normal I/O write performance to the pool. Signed-off-by: Will Andrews Conflicts: cmd/zpool/zpool_main.c include/libzfs_core.h include/sys/fs/zfs.h --- cmd/zpool/zpool_main.c | 45 +++++++++++++++++++++++++++++++++++ include/libzfs.h | 1 + include/libzfs_core.h | 2 ++ include/sys/ddt.h | 2 ++ include/sys/fs/zfs.h | 1 + include/sys/zap.h | 1 + lib/libzfs/libzfs_pool.c | 21 ++++++++++++++++ lib/libzfs_core/libzfs_core.c | 18 ++++++++++++++ man/man8/zpool.8 | 11 +++++++++ module/zfs/ddt.c | 27 +++++++++++++++++++++ module/zfs/ddt_zap.c | 16 +++++++++---- module/zfs/zap_micro.c | 17 +++++++++++++ module/zfs/zfs_ioctl.c | 34 ++++++++++++++++++++++++++ 13 files changed, 192 insertions(+), 4 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 4fd5f025b717..327918f699bc 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -81,6 +81,7 @@ static int zpool_do_remove(int, char **); static int zpool_do_labelclear(int, char **); static int zpool_do_checkpoint(int, char **); +static int zpool_do_ddtload(int, char **); static int zpool_do_list(int, char **); static int zpool_do_iostat(int, char **); @@ -143,6 +144,7 @@ typedef enum { HELP_CLEAR, HELP_CREATE, HELP_CHECKPOINT, + HELP_DDTLOAD, HELP_DESTROY, HELP_DETACH, HELP_EXPORT, @@ -278,6 +280,7 @@ static zpool_command_t command_table[] = { { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, { NULL }, { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, + { "ddtload", zpool_do_ddtload, HELP_DDTLOAD }, { NULL }, { "list", zpool_do_list, HELP_LIST }, { "iostat", zpool_do_iostat, HELP_IOSTAT }, @@ -338,6 +341,8 @@ get_usage(zpool_help_t idx) "\t [-m mountpoint] [-R root] ...\n")); case HELP_CHECKPOINT: return (gettext("\tcheckpoint [--discard] ...\n")); + case HELP_DDTLOAD: + return (gettext("\tddtload \n")); case HELP_DESTROY: return (gettext("\tdestroy [-f] \n")); case HELP_DETACH: @@ -2940,6 +2945,46 @@ zpool_do_checkpoint(int argc, char **argv) #define CHECKPOINT_OPT 1024 +/* + * zpool ddtload + * + * Loads the DDT table of the specified pool. + */ +int +zpool_do_ddtload(int argc, char **argv) +{ + char *pool; + zpool_handle_t *zhp; + int err; + + argc--; + argv++; + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + pool = argv[0]; + + if ((zhp = zpool_open(g_zfs, pool)) == NULL) { + /* As a special case, check for use of '/' in the name */ + if (strchr(pool, '/') != NULL) { + (void) fprintf(stderr, gettext("'zpool ddtload' " + "doesn't work on datasets.\n")); + } + return (1); + } + + err = zpool_ddtload(zhp); + zpool_close(zhp); + + return (err); +} + /* * zpool import [-d dir] [-D] * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] diff --git a/include/libzfs.h b/include/libzfs.h index a5b2a8393f43..e655eb90a3c6 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -438,6 +438,7 @@ extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); extern int zpool_checkpoint(zpool_handle_t *); extern int zpool_discard_checkpoint(zpool_handle_t *); +extern int zpool_ddtload(zpool_handle_t *); /* * Basic handle manipulations. These functions do not create or destroy the diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 74a64d10777d..8dedc048ac22 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -120,6 +120,8 @@ int lzc_reopen(const char *, boolean_t); int lzc_pool_checkpoint(const char *); int lzc_pool_checkpoint_discard(const char *); +int lzc_pool_ddtload(const char *); + #ifdef __cplusplus } #endif diff --git a/include/sys/ddt.h b/include/sys/ddt.h index fb1445d8d4b0..804aab10e30d 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -163,6 +163,7 @@ typedef struct ddt_ops { int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx, boolean_t prehash); int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx); + void (*ddt_op_loadall)(objset_t *os, uint64_t object); int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde); void (*ddt_op_prefetch)(objset_t *os, uint64_t object, ddt_entry_t *dde); @@ -229,6 +230,7 @@ extern void ddt_exit(ddt_t *ddt); extern void ddt_init(void); extern void ddt_fini(void); extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add); +extern void ddt_loadall(ddt_t *ddt); extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c167a594a7d4..69befab74158 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1272,6 +1272,7 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */ ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */ ZFS_IOC_POOL_TRIM, /* 0x5a50 */ + ZFS_IOC_POOL_DDTLOAD, /* 0x5a54 */ /* * Linux - 3/64 numbers reserved. diff --git a/include/sys/zap.h b/include/sys/zap.h index b19b4643879c..d8480b7ba2d3 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -220,6 +220,7 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf); int zap_contains(objset_t *ds, uint64_t zapobj, const char *name); int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name); +int zap_prefetch_object(objset_t *os, uint64_t zapobj); int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index a6e26ebcd4d8..3c5315d5eb66 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -1469,6 +1469,27 @@ zpool_discard_checkpoint(zpool_handle_t *zhp) return (0); } +/* + * Load the DDT table for the given pool. + */ +int +zpool_ddtload(zpool_handle_t *zhp) +{ + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; + int error; + + error = lzc_pool_ddtload(zhp->zpool_name); + if (error != 0) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot load DDT in '%s'"), zhp->zpool_name); + (void) zpool_standard_error(hdl, error, msg); + return (-1); + } + + return (0); +} + /* * Add the given vdevs to the pool. The caller must have already performed the * necessary verification to ensure that the vdev specification is well-formed. diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index eb332bc94e8c..dc159582f6f1 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -1302,6 +1302,24 @@ lzc_pool_checkpoint_discard(const char *pool) return (error); } +/* + * Load the DDT table for the specified pool. + */ +int +lzc_pool_ddtload(const char *pool) +{ + int error; + nvlist_t *result = NULL; + nvlist_t *args = fnvlist_alloc(); + + error = lzc_ioctl(ZFS_IOC_POOL_DDTLOAD, pool, args, &result); + + fnvlist_free(args); + fnvlist_free(result); + + return (error); +} + /* * Executes a read-only channel program. * diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index cdf166ccbb00..a57dc09b0f76 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -64,6 +64,9 @@ .Op Fl R Ar root .Ar pool vdev Ns ... .Nm +.Cm ddtload +.Ar pool +.Nm .Cm destroy .Op Fl f .Ar pool @@ -1239,6 +1242,14 @@ block devices. .El .It Xo .Nm +.Cm ddtload +.Ar pool +.Xc +Loads the DDT for the given pool, enabling improved write I/O performance +within the bounds of ARC. +.El +.It Xo +.Nm .Cm destroy .Op Fl f .Ar pool diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index a38c2b24ea2c..46466703a525 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -170,6 +170,15 @@ ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; } +static void +ddt_object_loadall(ddt_t *ddt, enum ddt_type type, enum ddt_class class) +{ + if (ddt_object_exists(ddt, type, class)) { + ddt_ops[type]->ddt_op_loadall(ddt->ddt_os, + ddt->ddt_object[type][class]); + } +} + static int ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, ddt_entry_t *dde) @@ -731,6 +740,24 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde) ddt_free(dde); } +void +ddt_loadall(ddt_t *ddt) +{ + enum ddt_type type; + enum ddt_class class; + + /* + * Load all DDT entries for each type/class combination. This is + * intended to perform a prefetch on all such blocks. For the same + * reason that ddt_prefetch isn't locked, this is also not locked. + */ + for (type = 0; type < DDT_TYPES; type++) { + for (class = 0; class < DDT_CLASSES; class++) { + ddt_object_loadall(ddt, type, class); + } + } +} + ddt_entry_t * ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) { diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 3489d31d9c9e..9b6fe3d9c731 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -55,16 +55,23 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) return (zap_destroy(os, object, tx)); } +static void +ddt_zap_loadall(objset_t *os, uint64_t object) +{ + + (void) zap_prefetch_object(os, object); + return; +} + static int ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) { uchar_t *cbuf; - uint64_t one, csize; int error; + uint64_t one, csize; cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP); - - error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key, + error = zap_length_uint64(os, object, (const uint64_t *)&dde->dde_key, DDT_KEY_WORDS, &one, &csize); if (error) goto out; @@ -78,9 +85,9 @@ ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) goto out; ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys)); + out: kmem_free(cbuf, sizeof (dde->dde_phys) + 1); - return (error); } @@ -159,6 +166,7 @@ const ddt_ops_t ddt_zap_ops = { "zap", ddt_zap_create, ddt_zap_destroy, + ddt_zap_loadall, ddt_zap_lookup, ddt_zap_prefetch, ddt_zap_update, diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 467812ff637c..0069ace73dbb 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -1044,6 +1044,22 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) return (err); } +int +zap_prefetch_object(objset_t *os, uint64_t zapobj) +{ + int error; + dmu_object_info_t doi; + + error = dmu_object_info(os, zapobj, &doi); + if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) + error = SET_ERROR(EINVAL); + if (error == 0) { + dmu_prefetch(os, zapobj, /*level*/ 0, /*offset*/ 0, + doi.doi_max_offset, ZIO_PRIORITY_SYNC_READ); + } + return (error); +} + int zap_lookup_by_dnode(dnode_t *dn, const char *name, uint64_t integer_size, uint64_t num_integers, void *buf) @@ -1665,6 +1681,7 @@ EXPORT_SYMBOL(zap_lookup_uint64); EXPORT_SYMBOL(zap_contains); EXPORT_SYMBOL(zap_prefetch); EXPORT_SYMBOL(zap_prefetch_uint64); +EXPORT_SYMBOL(zap_prefetch_object); EXPORT_SYMBOL(zap_add); EXPORT_SYMBOL(zap_add_by_dnode); EXPORT_SYMBOL(zap_add_uint64); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 08d405cb8546..8942a36cb1c4 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -3829,6 +3829,34 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl, return (spa_checkpoint_discard(poolname)); } +/* + * innvl: unused + * outnvl: empty + */ +static const zfs_ioc_key_t zfs_keys_pool_ddtload[] = { + /* no nvl keys */ +}; + +/* ARGSUSED */ +static int +zfs_ioc_pool_ddtload(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + spa_t *spa; + + error = spa_open(poolname, &spa, FTAG); + if (error != 0) + return (error); + + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_loadall(spa->spa_ddt[c]); + } + + spa_close(spa, FTAG); + + return (error); +} + /* * inputs: * zc_name name of dataset to destroy @@ -6660,6 +6688,12 @@ zfs_ioctl_init(void) zfs_keys_pool_discard_checkpoint, ARRAY_SIZE(zfs_keys_pool_discard_checkpoint)); + zfs_ioctl_register("zpool_ddtload", + ZFS_IOC_POOL_DDTLOAD, zfs_ioc_pool_ddtload, + zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE, + zfs_keys_pool_ddtload, ARRAY_SIZE(zfs_keys_pool_ddtload)); + zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE, zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,