Skip to content

Commit

Permalink
Trim L2ARC
Browse files Browse the repository at this point in the history
The l2arc_evict() function is responsible for evicting buffers which
reference the next bytes of the L2ARC device to be overwritten. Teach
this function to additionally TRIM that vdev space before it is
overwritten if the device has been filled with data.

We implement a new type of TRIM, TRIM_TYPE_L2ARC, to be used for TRIM
issued on cache devices. We also implement a "Trim Ahead" feature
It is a zfs module parameter, expressed in % of the current write size
and has a minimum of 64MB. This trims ahead of the current evict hand.
The default is 0 which disables TRIM on L2ARC. To enable TRIM on L2ARC
we set l2arc_trim_ahead > 0.

We also implement TRIM of the whole cache device upon addition to a
pool, pool creation or when the header of the device is invalid upon
importing a pool or onlining a cache device. This is dependent on
l2arc_trim_ahead > 0. TRIM of the whole device is done with
TRIM_TYPE_MANUAL so that its status can be monitored by zpool status -t.
We save the TRIM state for the whole device and the time of competion
on-disk in the header, and restore these upon L2ARC rebuild so that
zpool status -t can correctly report them. Whole device TRIM is done
asynchronously so that the user can export of the pool or remove the
cache device while it is trimming (ie if it is too slow).

We do not TRIM the whole device if persistent L2ARC has been disabled by
l2arc_rebuild_enabled = 0 because we may not want to lose all cached
buffers (eg we may want to import the pool with
l2arc_rebuild_enabled = 0 only once because of memory pressure). If
persistent L2ARC has been disabled by setting the module parameter
l2arc_rebuild_blocks_min_l2size to a value greater than the size of the
cache device then the whole device is trimmed upon creation or import of
a pool if l2arc_trim_ahead > 0.

Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
gamanakis committed May 31, 2020
1 parent 3bf3b16 commit 032f438
Show file tree
Hide file tree
Showing 19 changed files with 557 additions and 51 deletions.
6 changes: 5 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3707,8 +3707,12 @@ dump_l2arc_header(int fd)
(u_longlong_t)l2dhdr.dh_evict);
(void) printf(" lb_asize_refcount: %llu\n",
(u_longlong_t)l2dhdr.dh_lb_asize);
(void) printf(" lb_count_refcount: %llu\n\n",
(void) printf(" lb_count_refcount: %llu\n",
(u_longlong_t)l2dhdr.dh_lb_count);
(void) printf(" trim_action_time: %llu\n",
(u_longlong_t)l2dhdr.dh_trim_action_time);
(void) printf(" trim_state: %llu\n\n",
(u_longlong_t)l2dhdr.dh_trim_state);
}

dump_l2arc_log_blocks(fd, l2dhdr, &rebuild);
Expand Down
14 changes: 13 additions & 1 deletion include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,14 @@ typedef struct l2arc_dev_hdr_phys {
*/
uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
const uint64_t dh_pad[32]; /* pad to 512 bytes */
/*
* Mirrors of vdev_trim_action_time and vdev_trim_state, used to
* display when the cache device was fully trimmed for the last
* time.
*/
uint64_t dh_trim_action_time;
uint64_t dh_trim_state;
const uint64_t dh_pad[30]; /* pad to 512 bytes */
zio_eck_t dh_tail;
} l2arc_dev_hdr_phys_t;
CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
Expand Down Expand Up @@ -399,6 +406,7 @@ typedef struct l2arc_dev {
* Number of log blocks present on the device.
*/
zfs_refcount_t l2ad_lb_count;
boolean_t l2ad_trim_all; /* TRIM whole device */
} l2arc_dev_t;

/*
Expand Down Expand Up @@ -902,6 +910,10 @@ extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
const l2arc_log_blkptr_t *lbp);

/* used in vdev_trim.c */
void l2arc_dev_hdr_update(l2arc_dev_t *dev);
l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);

#ifdef __cplusplus
}
#endif
Expand Down
8 changes: 8 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@ typedef enum {
typedef enum trim_type {
TRIM_TYPE_MANUAL = 0,
TRIM_TYPE_AUTO = 1,
TRIM_TYPE_SIMPLE = 2
} trim_type_t;

/* state manipulation functions */
Expand Down Expand Up @@ -788,6 +789,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
#define SPA_ASYNC_TRIM_RESTART 0x200
#define SPA_ASYNC_AUTOTRIM_RESTART 0x400
#define SPA_ASYNC_L2CACHE_REBUILD 0x800
#define SPA_ASYNC_L2CACHE_TRIM 0x1000

/*
* Controls the behavior of spa_vdev_remove().
Expand Down Expand Up @@ -940,6 +942,12 @@ typedef struct spa_iostats {
kstat_named_t autotrim_bytes_skipped;
kstat_named_t autotrim_extents_failed;
kstat_named_t autotrim_bytes_failed;
kstat_named_t simple_trim_extents_written;
kstat_named_t simple_trim_bytes_written;
kstat_named_t simple_trim_extents_skipped;
kstat_named_t simple_trim_bytes_skipped;
kstat_named_t simple_trim_extents_failed;
kstat_named_t simple_trim_bytes_failed;
} spa_iostats_t;

extern void spa_stats_init(spa_t *spa);
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ typedef enum {
} vdev_labeltype_t;

extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
void vdev_trim_l2arc(spa_t *spa);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ struct vdev {
uint64_t vdev_initialize_inflight;
kmutex_t vdev_trim_io_lock;
kcondvar_t vdev_trim_io_cv;
uint64_t vdev_trim_inflight[2];
uint64_t vdev_trim_inflight[3];

/*
* Values stored in the config for an indirect or removing vdev.
Expand Down
2 changes: 2 additions & 0 deletions include/sys/vdev_trim.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ extern void vdev_autotrim(spa_t *spa);
extern void vdev_autotrim_stop_all(spa_t *spa);
extern void vdev_autotrim_stop_wait(vdev_t *vd);
extern void vdev_autotrim_restart(spa_t *spa);
extern int vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size,
trim_type_t type);

#ifdef __cplusplus
}
Expand Down
20 changes: 19 additions & 1 deletion man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,29 @@ Default value: \fB2\fR.
.ad
.RS 12n
Scales \fBl2arc_headroom\fR by this percentage when L2ARC contents are being
successfully compressed before writing. A value of 100 disables this feature.
successfully compressed before writing. A value of \fB100\fR disables this
feature.
.sp
Default value: \fB200\fR%.
.RE

.sp
.ne 2
.na
\fBl2arc_trim_ahead\fR (ulong)
.ad
.RS 12n
Trims ahead of the current evict hand on L2ARC devices by this percentage of
write size if we have filled the device. It has a minimum value of 64MB. If
set to \fB100\fR we TRIM twice the space required to accomodate upcoming
writes. It also enables TRIM of the whole L2ARC device upon creation or
addition to an existing pool or if the header of the device is invalid upon
importing a pool or onlining a cache device. A value of \fB0\fR disables TRIM
on L2ARC altogether.
.sp
Default value: \fB0\fR%.
.RE

.sp
.ne 2
.na
Expand Down
2 changes: 2 additions & 0 deletions man/man8/zpoolprops.8
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ this property is
Automatic TRIM does not immediately reclaim blocks after a free. Instead,
it will optimistically delay allowing smaller ranges to be aggregated in to
a few larger ones. These can then be issued more efficiently to the storage.
TRIM on L2ARC devices is enabled by setting
.Sy l2arc_trim_ahead > 0 .
.Pp
Be aware that automatic trimming of recently freed data blocks can put
significant stress on the underlying storage devices. This will vary
Expand Down
15 changes: 14 additions & 1 deletion module/os/linux/zfs/spa_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,12 @@ static spa_iostats_t spa_iostats_template = {
{ "autotrim_bytes_skipped", KSTAT_DATA_UINT64 },
{ "autotrim_extents_failed", KSTAT_DATA_UINT64 },
{ "autotrim_bytes_failed", KSTAT_DATA_UINT64 },
{ "simple_trim_extents_written", KSTAT_DATA_UINT64 },
{ "simple_trim_bytes_written", KSTAT_DATA_UINT64 },
{ "simple_trim_extents_skipped", KSTAT_DATA_UINT64 },
{ "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 },
{ "simple_trim_extents_failed", KSTAT_DATA_UINT64 },
{ "simple_trim_bytes_failed", KSTAT_DATA_UINT64 },
};

#define SPA_IOSTATS_ADD(stat, val) \
Expand All @@ -929,13 +935,20 @@ spa_iostats_trim_add(spa_t *spa, trim_type_t type,
SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
} else {
} else if (type == TRIM_TYPE_AUTO) {
SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
} else {
SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
}
}

Expand Down
Loading

0 comments on commit 032f438

Please sign in to comment.