Skip to content

Commit

Permalink
Improvements on persistent L2ARC
Browse files Browse the repository at this point in the history
Multiple improvements on code readability and corrected comments.

Issue #10224 raised concern whether we are correctly accounting for
the size of L2ARC log blocks. We now implement a new arcstat
"l2_log_blk_asize" which reflects how l2arc_evict() and
l2arc_log_blk_commit() account for the size of L2ARC log blocks. This
is also written to the header of the cache device as dh_lb_asize. zdb
can compare dh_lb_asize to the sum of all aligned sizes as reported
by l2arc_rebuild(). This provides valuable information as to whether
the first two functions evict/commit log blocks that are not seen by
l2arc_rebuild(). If those two values are not equal zdb exits with an
error. For this feature to work correctly there should be no active
writes on the cache device. This is also employed in the tests of
persistent L2ARC. Since we modify the structure of the cache device
header, this change breaks backward compatibility in terms of
persistent L2ARC.

In l2arc_rebuild_vdev() if the amount of committed log entries in a log
block is 0 and the device header is valid we update the device header.
This will facilitate trimming of the whole device in this case when
TRIM for L2ARC is implemented.

Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
gamanakis committed Apr 19, 2020
1 parent c614fd6 commit c61a294
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 84 deletions.
78 changes: 52 additions & 26 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3533,7 +3533,7 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
(u_longlong_t)lbps.lbp_payload_start);
(void) printf("|\t\tlsize: %llu\n",
(u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop));
(void) printf("|\t\tpsize: %llu\n",
(void) printf("|\t\tasize: %llu\n",
(u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop));
(void) printf("|\t\tcompralgo: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop));
Expand All @@ -3542,18 +3542,19 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
(void) printf("|\n\n");
}

static void
static uint64_t
dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
{
l2arc_log_blk_phys_t this_lb;
uint64_t psize;
uint64_t asize, tot_asize = 0;
l2arc_log_blkptr_t lbps[2];
abd_t *abd;
zio_cksum_t cksum;
int i = 0, failed = 0;
l2arc_dev_t dev;

print_l2arc_log_blocks();
if (!dump_opt['q'])
print_l2arc_log_blocks();
bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps));

dev.l2ad_evict = l2dhdr.dh_evict;
Expand All @@ -3562,9 +3563,11 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)

if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) {
/* no log blocks to read */
(void) printf("No log blocks to read\n");
(void) printf("\n");
return;
if (!dump_opt['q']) {
(void) printf("No log blocks to read\n");
(void) printf("\n");
}
return (0);
} else {
dev.l2ad_hand = lbps[0].lbp_daddr +
L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
Expand All @@ -3576,29 +3579,35 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
if (!l2arc_log_blkptr_valid(&dev, &lbps[0]))
break;

psize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
if (pread64(fd, &this_lb, psize, lbps[0].lbp_daddr) != psize) {
(void) printf("Error while reading next log block\n\n");
/* L2BLK_GET_PSIZE return alinged size for log blocks */
asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) {
if (!dump_opt['q']) {
(void) printf("Error while reading next log "
"block\n\n");
}
break;
}

fletcher_4_native_varsize(&this_lb, psize, &cksum);
fletcher_4_native_varsize(&this_lb, asize, &cksum);
if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
failed++;
(void) printf("Invalid cksum\n");
dump_l2arc_log_blkptr(lbps[0]);
if (!dump_opt['q']) {
(void) printf("Invalid cksum\n");
dump_l2arc_log_blkptr(lbps[0]);
}
break;
}

switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
abd = abd_alloc_for_io(psize, B_TRUE);
abd_copy_from_buf_off(abd, &this_lb, 0, psize);
abd = abd_alloc_for_io(asize, B_TRUE);
abd_copy_from_buf_off(abd, &this_lb, 0, asize);
zio_decompress_data(L2BLK_GET_COMPRESS(
(&lbps[0])->lbp_prop), abd, &this_lb,
psize, sizeof (this_lb));
asize, sizeof (this_lb));
abd_free(abd);
break;
default:
Expand All @@ -3608,18 +3617,20 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
byteswap_uint64_array(&this_lb, sizeof (this_lb));
if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) {
(void) printf("Invalid log block magic\n\n");
if (!dump_opt['q'])
(void) printf("Invalid log block magic\n\n");
break;
}

i++;
if (dump_opt['l'] > 1) {
tot_asize += asize;
if (dump_opt['l'] > 1 && !dump_opt['q']) {
(void) printf("lb[%4d]\tmagic: %llu\n", i,
(u_longlong_t)this_lb.lb_magic);
dump_l2arc_log_blkptr(lbps[0]);
}

if (dump_opt['l'] > 2)
if (dump_opt['l'] > 2 && !dump_opt['q'])
dump_l2arc_log_entries(l2dhdr.dh_log_blk_ent,
this_lb.lb_entries, i);

Expand All @@ -3631,15 +3642,22 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
lbps[1] = this_lb.lb_prev_lbp;
}

(void) printf("log_blk_count:\t %d with valid cksum\n", i);
(void) printf("\t\t %d with invalid cksum\n\n", failed);
if (!dump_opt['q']) {
(void) printf("log_blk_count:\t %d with valid cksum\n", i);
(void) printf("\t\t %d with invalid cksum\n", failed);
(void) printf("log_blk_asize:\t %llu\n\n",
(u_longlong_t)tot_asize);
}

return (tot_asize);
}

static void
static int
dump_l2arc_header(int fd)
{
l2arc_dev_hdr_phys_t l2dhdr;
int error = B_FALSE;
uint64_t lb_asize;

if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) {
Expand All @@ -3654,6 +3672,8 @@ dump_l2arc_header(int fd)

if (error) {
(void) printf("L2ARC device header not found\n\n");
/* Do not return an error here for backward compatibility */
return (0);
} else if (!dump_opt['q']) {
print_l2arc_header();

Expand All @@ -3677,11 +3697,17 @@ dump_l2arc_header(int fd)
(u_longlong_t)l2dhdr.dh_start);
(void) printf(" end: %llu\n",
(u_longlong_t)l2dhdr.dh_end);
(void) printf(" evict: %llu\n\n",
(void) printf(" evict: %llu\n",
(u_longlong_t)l2dhdr.dh_evict);

dump_l2arc_log_blocks(fd, l2dhdr);
(void) printf(" lb_asize: %llu\n\n",
(u_longlong_t)l2dhdr.dh_lb_asize);
}

lb_asize = dump_l2arc_log_blocks(fd, l2dhdr);
if (lb_asize != l2dhdr.dh_lb_asize)
return (1);

return (0);
}

static void
Expand Down Expand Up @@ -4009,7 +4035,7 @@ dump_label(const char *dev)
* Dump the L2ARC header, if existent.
*/
if (read_l2arc_header)
dump_l2arc_header(fd);
error |= dump_l2arc_header(fd);

cookie = NULL;
while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
Expand Down
30 changes: 20 additions & 10 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ typedef struct l2arc_log_blkptr {
/*
* lbp_prop has the following format:
* * logical size (in bytes)
* * physical (compressed) size (in bytes)
* * aligned (after compression) size (in bytes)
* * compression algorithm (we always LZ4-compress l2arc logs)
* * checksum algorithm (used for lbp_cksum)
*/
Expand Down Expand Up @@ -228,15 +228,19 @@ typedef struct l2arc_dev_hdr_phys {
* Used in zdb.c for determining if a log block is valid, in the same
* way that l2arc_rebuild() does.
*/
uint64_t dh_start;
uint64_t dh_end;
uint64_t dh_start; /* mirror of l2ad_start */
uint64_t dh_end; /* mirror of l2ad_end */
/*
* Aligned size of all log blocks as accounted by vdev_space_update().
*/
uint64_t dh_lb_asize;

/*
* Start of log block chain. [0] -> newest log, [1] -> one older (used
* for initiating prefetch).
*/
l2arc_log_blkptr_t dh_start_lbps[2];
const uint64_t dh_pad[34]; /* pad to 512 bytes */
const uint64_t dh_pad[33]; /* pad to 512 bytes */
zio_eck_t dh_tail;
} l2arc_dev_hdr_phys_t;
CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
Expand Down Expand Up @@ -387,6 +391,10 @@ typedef struct l2arc_dev {
uint64_t l2ad_evict; /* evicted offset in bytes */
/* List of pointers to log blocks present in the L2ARC device */
list_t l2ad_lbptr_list;
/*
* Aligned size of all log blocks as accounted by vdev_space_update().
*/
uint64_t l2ad_lb_asize;
} l2arc_dev_t;

/*
Expand Down Expand Up @@ -738,14 +746,16 @@ typedef struct arc_stats {
*/
kstat_named_t arcstat_l2_log_blk_writes;
/*
* Moving average of the physical size of the L2ARC log blocks, in
* Moving average of the aligned size of the L2ARC log blocks, in
* bytes. Updated during L2ARC rebuild and during writing of L2ARC
* log blocks.
*/
kstat_named_t arcstat_l2_log_blk_avg_size;
kstat_named_t arcstat_l2_log_blk_avg_asize;
/* Aligned size of L2ARC log blocks on L2ARC devices. */
kstat_named_t arcstat_l2_log_blk_asize;
/*
* Moving average of the physical size of L2ARC restored data, in bytes,
* to the physical size of their metadata in ARC, in bytes.
* Moving average of the aligned size of L2ARC restored data, in bytes,
* to the aligned size of their metadata in ARC, in bytes.
* Updated during L2ARC rebuild and during writing of L2ARC log blocks.
*/
kstat_named_t arcstat_l2_data_to_meta_ratio;
Expand Down Expand Up @@ -780,6 +790,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_rebuild_abort_lowmem;
/* Logical size of L2ARC restored data, in bytes. */
kstat_named_t arcstat_l2_rebuild_size;
/* Aligned size of L2ARC restored data, in bytes. */
kstat_named_t arcstat_l2_rebuild_asize;
/*
* Number of L2ARC log entries (buffers) that were successfully
* restored in ARC.
Expand All @@ -790,8 +802,6 @@ typedef struct arc_stats {
* were not restored again.
*/
kstat_named_t arcstat_l2_rebuild_bufs_precached;
/* Physical size of L2ARC restored data, in bytes. */
kstat_named_t arcstat_l2_rebuild_psize;
/*
* Number of L2ARC log blocks that were restored successfully. Each
* log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
Expand Down
5 changes: 4 additions & 1 deletion man/man8/zdb.8
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,10 @@ Read the vdev labels and L2ARC header from the specified device.
.Nm Fl l
will return 0 if valid label was found, 1 if error occurred, and 2 if no valid
labels were found. The presence of L2ARC header is indicated by a specific
sequence (L2ARC_DEV_HDR_MAGIC). Each unique configuration is displayed only
sequence (L2ARC_DEV_HDR_MAGIC). If there is an error in accounting for the size
of L2ARC log blocks
.Nm Fl l
will return 1. Each unique configuration is displayed only
once.
.It Fl ll Ar device
In addition display label space usage stats. If a valid L2ARC header was found
Expand Down
Loading

0 comments on commit c61a294

Please sign in to comment.