Skip to content

Commit

Permalink
Add DDT prune command
Browse files Browse the repository at this point in the history
Requires the new 'flat' physical data which has the start
time for a class entry.

The amount to prune can be based on a target percentage of
the unique entries or based on the age (i.e., every entry
older than N days).

Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.

Signed-off-by: Don Brady <don.brady@klarasystems.com>
  • Loading branch information
don-brady committed Aug 29, 2024
1 parent 6a87877 commit b363a90
Show file tree
Hide file tree
Showing 21 changed files with 905 additions and 85 deletions.
55 changes: 45 additions & 10 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2045,7 +2045,7 @@ dump_all_ddts(spa_t *spa)

for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES;
Expand All @@ -2072,6 +2072,32 @@ dump_all_ddts(spa_t *spa)
}

dump_dedup_ratio(&dds_total);

/*
* Dump a histogram of unique class entry age
*/
if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) {
ddt_age_histo_t histogram;

(void) printf("DDT walk unique, building age histogram...\n");
ddt_prune_walk(spa, 0, &histogram);

/*
* print out histogram for unique entry class birth
*/
if (histogram.dah_entries > 0) {
(void) printf("%5s %9s %4s\n",
"age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n",
"-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9d %4d%%\n", 1 << i,
(int)histogram.dah_age_histo[i],
(int)((histogram.dah_age_histo[i] * 100) /
histogram.dah_entries));
}
}
}
}

static void
Expand Down Expand Up @@ -5749,12 +5775,17 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
ddt_entry_t *dde = ddt_lookup(ddt, bp);

/*
* ddt_lookup() can only return NULL if this block didn't exist
* ddt_lookup() can return NULL if this block didn't exist
* in the DDT and creating it would take the DDT over its
* quota. Since we got the block from disk, it must exist in
* the DDT, so this can't happen.
* the DDT, so this can't happen. However, when unique entries
* are pruned, the dedup bit can be set with no corresponding
* entry in the DDT.
*/
VERIFY3P(dde, !=, NULL);
if (dde == NULL) {
ddt_exit(ddt);
goto skipped;
}

/* Get the phys for this variant */
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
Expand All @@ -5774,8 +5805,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
(void *)(((uintptr_t)dde->dde_io) | (1 << v));

/* Consume a reference for this block. */
VERIFY3U(ddt_phys_total_refcnt(ddt, dde->dde_phys), >, 0);
ddt_phys_decref(dde->dde_phys, v);
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
ddt_phys_decref(dde->dde_phys, v);

/*
* If this entry has a single flat phys, it may have been
Expand Down Expand Up @@ -5864,6 +5895,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
}
}

skipped:
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
int t = (i & 1) ? type : ZDB_OT_TOTAL;
Expand Down Expand Up @@ -8138,7 +8170,7 @@ dump_mos_leaks(spa_t *spa)

for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;

/* DDT store objects */
Expand All @@ -8150,11 +8182,14 @@ dump_mos_leaks(spa_t *spa)
}

/* FDT container */
mos_obj_refd(ddt->ddt_dir_object);
if (ddt->ddt_version == DDT_VERSION_FDT)
mos_obj_refd(ddt->ddt_dir_object);

/* FDT log objects */
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
if (ddt->ddt_flags & DDT_FLAG_LOG) {
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
}
}

if (spa->spa_brt != NULL) {
Expand Down
89 changes: 89 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ static int zpool_do_version(int, char **);

static int zpool_do_wait(int, char **);

static int zpool_do_ddt_prune(int, char **);

static int zpool_do_help(int argc, char **argv);

static zpool_compat_status_t zpool_do_load_compat(
Expand Down Expand Up @@ -170,6 +172,7 @@ typedef enum {
HELP_CLEAR,
HELP_CREATE,
HELP_CHECKPOINT,
HELP_DDT_PRUNE,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
Expand Down Expand Up @@ -426,6 +429,8 @@ static zpool_command_t command_table[] = {
{ "sync", zpool_do_sync, HELP_SYNC },
{ NULL },
{ "wait", zpool_do_wait, HELP_WAIT },
{ NULL },
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
};

#define NCOMMAND (ARRAY_SIZE(command_table))
Expand Down Expand Up @@ -545,6 +550,8 @@ get_usage(zpool_help_t idx)
case HELP_WAIT:
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
"<pool> [interval]\n"));
case HELP_DDT_PRUNE:
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -13342,6 +13349,88 @@ found:;
return (error);
}

/*
* zpool ddtprune -d|-p <amount> <pool>
*
* -d <days> Prune entries <days> old and older
* -p <percent> Prune <percent> amount of entries
*
* Prune single reference entries from DDT to satisfy the amount specified.
*/
int
zpool_do_ddt_prune(int argc, char **argv)
{
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
uint64_t amount = 0;
zpool_handle_t *zhp;
char *endptr;
int c;

while ((c = getopt(argc, argv, "d:p:")) != -1) {
switch (c) {
case 'd':
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
(void) fprintf(stderr, gettext("-d cannot be "
"combined with -p option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' || amount == 0) {
(void) fprintf(stderr,
gettext("invalid days value\n"));
usage(B_FALSE);
}
amount *= 86400; /* convert days to seconds */
unit = ZPOOL_DDT_PRUNE_AGE;
break;
case 'p':
if (unit == ZPOOL_DDT_PRUNE_AGE) {
(void) fprintf(stderr, gettext("-p cannot be "
"combined with -d option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' ||
amount == 0 || amount > 100) {
(void) fprintf(stderr,
gettext("invalid percentage value\n"));
usage(B_FALSE);
}
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;

if (unit == ZPOOL_DDT_PRUNE_NONE) {
(void) fprintf(stderr,
gettext("missing amount option (-d|-p <value>)\n"));
usage(B_FALSE);
} else if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
usage(B_FALSE);
} else if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
zhp = zpool_open(g_zfs, argv[0]);
if (zhp == NULL)
return (-1);

int error = zpool_ddt_prune(zhp, unit, amount);

zpool_close(zhp);

return (error);
}

static int
find_command_idx(const char *command, int *idx)
{
Expand Down
28 changes: 28 additions & 0 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ extern unsigned long zio_decompress_fail_fraction;
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
extern uint64_t raidz_expand_max_reflow_bytes;
extern uint_t raidz_expand_pause_point;
extern boolean_t ddt_prune_artificial_age;
extern boolean_t ddt_dump_prune_histogram;


static ztest_shared_opts_t *ztest_shared_opts;
Expand Down Expand Up @@ -446,6 +448,7 @@ ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
ztest_func_t ztest_pool_prefetch_ddt;
ztest_func_t ztest_ddt_prune;

static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
Expand Down Expand Up @@ -502,6 +505,7 @@ static ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
};

#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
Expand Down Expand Up @@ -7288,6 +7292,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
mutex_exit(&ztest_vdev_lock);
}

void
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;

spa_t *spa = ztest_spa;
uint64_t pct = ztest_random(15) + 1;

(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
}

/*
* Verify pool integrity by running zdb.
*/
Expand Down Expand Up @@ -7469,6 +7484,13 @@ ztest_resume_thread(void *arg)
{
spa_t *spa = arg;

/*
* Synthesize aged DDT entries for ddt prune testing
*/
ddt_prune_artificial_age = B_TRUE;
if (ztest_opts.zo_verbose >= 3)
ddt_dump_prune_histogram = B_TRUE;

while (!ztest_exiting) {
if (spa_suspended(spa))
ztest_resume(spa);
Expand Down Expand Up @@ -8587,6 +8609,12 @@ ztest_init(ztest_shared_t *zs)
if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
continue;

/*
* split 50/50 between legacy and fast dedup
*/
if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
continue;

VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
spa_feature_table[i].fi_uname));
fnvlist_add_uint64(props, buf, 0);
Expand Down
1 change: 1 addition & 0 deletions contrib/debian/openzfs-zfsutils.install
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ usr/share/man/man8/zpool-clear.8
usr/share/man/man8/zpool-create.8
usr/share/man/man8/zpool-destroy.8
usr/share/man/man8/zpool-detach.8
usr/share/man/man8/zpool-ddtprune.8
usr/share/man/man8/zpool-events.8
usr/share/man/man8/zpool-export.8
usr/share/man/man8/zpool-get.8
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);

_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
uint64_t);

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,

extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);

extern int ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
uint64_t amount);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit b363a90

Please sign in to comment.