Skip to content

Commit

Permalink
Fix issues with raw receive_write_byref()
Browse files Browse the repository at this point in the history
This patch fixes 2 issues with raw, deduplicated send streams. The
first is that datasets who had been completely received earlier in
the stream were not still marked as raw receives. This caused
problems when newly received datasets attempted to fetch raw data
from these datasets without this flag set.

The second problem was that the arc freeze checksum code was not
consistent about which locks needed to be held while performing
its asserts. The proper locking needed to run these asserts is
actually fairly nuanced, since the asserts touch the linked list
of buffers (requiring the header lock), the arc_state (requiring
the b_evict_lock), and the b_freeze_cksum (requiring the
b_freeze_lock). This seems like a large performance sacrifice and
a lot of unneeded complexity to verify that this relatively small
debug feature is working as intended, so this patch simply removes
these asserts instead.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7701
  • Loading branch information
Tom Caputi authored and behlendorf committed Aug 20, 2018
1 parent c962fd6 commit 149ce88
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 43 deletions.
45 changes: 13 additions & 32 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1561,6 +1561,9 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
static boolean_t
arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
{
ASSERT(hdr->b_l1hdr.b_state == arc_anon ||
MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));

for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
if (!ARC_BUF_COMPRESSED(b)) {
return (B_TRUE);
Expand All @@ -1584,15 +1587,13 @@ arc_cksum_verify(arc_buf_t *buf)
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

if (ARC_BUF_COMPRESSED(buf)) {
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
if (ARC_BUF_COMPRESSED(buf))
return;
}

ASSERT(HDR_HAS_L1HDR(hdr));

mutex_enter(&hdr->b_l1hdr.b_freeze_lock);

if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) {
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
Expand Down Expand Up @@ -1650,11 +1651,7 @@ arc_cksum_compute(arc_buf_t *buf)
ASSERT(HDR_HAS_L1HDR(hdr));

mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
ASSERT(arc_hdr_has_uncompressed_buf(hdr));
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
} else if (ARC_BUF_COMPRESSED(buf)) {
if (hdr->b_l1hdr.b_freeze_cksum != NULL || ARC_BUF_COMPRESSED(buf)) {
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
}
Expand Down Expand Up @@ -1746,14 +1743,10 @@ arc_buf_thaw(arc_buf_t *buf)
arc_cksum_verify(buf);

/*
* Compressed buffers do not manipulate the b_freeze_cksum or
* allocate b_thawed.
* Compressed buffers do not manipulate the b_freeze_cksum.
*/
if (ARC_BUF_COMPRESSED(buf)) {
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
if (ARC_BUF_COMPRESSED(buf))
return;
}

ASSERT(HDR_HAS_L1HDR(hdr));
arc_cksum_free(hdr);
Expand All @@ -1763,26 +1756,14 @@ arc_buf_thaw(arc_buf_t *buf)
void
arc_buf_freeze(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock;

if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

if (ARC_BUF_COMPRESSED(buf)) {
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
if (ARC_BUF_COMPRESSED(buf))
return;
}

hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);

ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL ||
hdr->b_l1hdr.b_state == arc_anon);
ASSERT(HDR_HAS_L1HDR(buf->b_hdr));
arc_cksum_compute(buf);
mutex_exit(hash_lock);
}

/*
Expand Down Expand Up @@ -1901,7 +1882,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
void *tmpbuf = NULL;
abd_t *abd = hdr->b_l1hdr.b_pabd;

ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT(HDR_AUTHENTICATED(hdr));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);

Expand Down Expand Up @@ -1971,7 +1952,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
boolean_t no_crypt = B_FALSE;
boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);

ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT(HDR_ENCRYPTED(hdr));

arc_hdr_alloc_abd(hdr, B_FALSE);
Expand Down Expand Up @@ -2090,7 +2071,7 @@ arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)

ASSERT(HDR_ENCRYPTED(hdr));
ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);

zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data,
Expand Down
1 change: 0 additions & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,6 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
!DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
err = 0;


DB_DNODE_EXIT(db);

return (err);
Expand Down
9 changes: 7 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1622,6 +1622,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *dst_handle;
dmu_buf_impl_t *dstdb;
dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
dmu_object_type_t type;
arc_buf_t *abuf;
uint64_t datalen;
boolean_t byteorder;
Expand All @@ -1637,19 +1638,23 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dstdb = (dmu_buf_impl_t *)dst_handle;
datalen = arc_buf_size(srcdb->db_buf);

DB_DNODE_ENTER(dstdb);
type = DB_DNODE(dstdb)->dn_type;
DB_DNODE_EXIT(dstdb);

/* allocated an arc buffer that matches the type of srcdb->db_buf */
if (arc_is_encrypted(srcdb->db_buf)) {
arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
byteorder, salt, iv, mac, type,
datalen, arc_buf_lsize(srcdb->db_buf),
arc_get_compression(srcdb->db_buf));
} else {
/* we won't get a compressed db back from dmu_buf_hold() */
ASSERT3U(arc_get_compression(srcdb->db_buf),
==, ZIO_COMPRESS_OFF);
abuf = arc_loan_buf(os->os_spa,
DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
DMU_OT_IS_METADATA(type), datalen);
}

ASSERT3U(datalen, ==, arc_buf_size(abuf));
Expand Down
34 changes: 28 additions & 6 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -2343,9 +2343,14 @@ free_guid_map_onexit(void *arg)
guid_map_entry_t *gmep;

while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
dsl_dataset_long_rele(gmep->gme_ds, gmep);
dsl_dataset_rele_flags(gmep->gme_ds,
(gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep);
ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT;

if (gmep->raw) {
gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE;
dsflags &= ~DS_HOLD_FLAG_DECRYPT;
}

dsl_dataset_disown(gmep->gme_ds, dsflags, gmep);
kmem_free(gmep, sizeof (guid_map_entry_t));
}
avl_destroy(ca);
Expand Down Expand Up @@ -4237,6 +4242,7 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
dsl_pool_t *dp;
dsl_dataset_t *snapds;
guid_map_entry_t *gmep;
objset_t *os;
ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
int err;

Expand All @@ -4246,13 +4252,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
if (err != 0)
return (err);
gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds);
err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
if (err == 0) {
gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
/*
* If this is a deduplicated raw send stream, we need
* to make sure that we can still read raw blocks from
* earlier datasets in the stream, so we set the
* os_raw_receive flag now.
*/
if (raw) {
err = dmu_objset_from_ds(snapds, &os);
if (err != 0) {
dsl_dataset_disown(snapds, dsflags, FTAG);
dsl_pool_rele(dp, FTAG);
kmem_free(gmep, sizeof (*gmep));
return (err);
}
os->os_raw_receive = B_TRUE;
}

gmep->raw = raw;
gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
gmep->gme_ds = snapds;
avl_add(guid_map, gmep);
dsl_dataset_long_hold(snapds, gmep);
} else {
kmem_free(gmep, sizeof (*gmep));
}
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy',
'send_encrypted_props', 'send_freeobjects', 'send_realloc_dnode_size',
'send_hole_birth']
'send_hole_birth', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend']

[tests/functional/scrub_mirror]
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/rsend/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ dist_pkgdata_SCRIPTS = \
send-cpL_varied_recsize.ksh \
send_freeobjects.ksh \
send_realloc_dnode_size.ksh \
send_hole_birth.ksh
send_hole_birth.ksh \
send-wDR_encrypted_zvol.ksh

dist_pkgdata_DATA = \
rsend.cfg \
Expand Down
93 changes: 93 additions & 0 deletions tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2018 by Datto Inc. All rights reserved.
#

. $STF_SUITE/tests/functional/rsend/rsend.kshlib

#
# DESCRIPTION:
# Verify that zvols with dedup=on and encryption=on can be sent and received
# with a deduplicated raw send stream.
#
# STRATEGY:
# 1. Create a zvol with dedup and encryption on and put a filesystem on it
# 2. Copy a file into the zvol a few times and take a snapshot
# 3. Repeat step 2 a few times to create more snapshots
# 4. Send all snapshots in a recursive, raw, deduplicated send stream
# 5. Mount the received zvol and verify that all of the data there is correct
#

verify_runnable "both"

function cleanup
{
ismounted $recvmnt ext4 && log_must umount $recvmnt
ismounted $mntpnt ext4 && log_must umount $mntpnt
[[ -d $recvmnt ]] && log_must rm -rf $keyfile
[[ -d $mntpnt ]] && log_must rm -rf $keyfile
destroy_dataset $TESTPOOL/recv "-r"
destroy_dataset $TESTPOOL/$TESTVOL "-r"
[[ -f $keyfile ]] && log_must rm $keyfile
[[ -f $sendfile ]] && log_must rm $sendfile
}
log_onexit cleanup

log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"

typeset keyfile=/$TESTPOOL/pkey
typeset snap_count=5
typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
typeset mntpnt=$TESTDIR/$TESTVOL
typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
typeset recvmnt=$TESTDIR/recvmnt
typeset sendfile=$TESTDIR/sendfile

log_must eval "echo 'password' > $keyfile"

log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
-o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
log_must block_device_wait

log_must eval "echo 'y' | newfs -t ext4 -v $zdev"
log_must mkdir -p $mntpnt
log_must mkdir -p $recvmnt
log_must mount $zdev $mntpnt

for ((i = 1; i <= $snap_count; i++)); do
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
for ((j = 0; j < 10; j++)); do
log_must cp $mntpnt/file $mntpnt/file$j
done

log_must sync
log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
done

log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
log_must zfs load-key $TESTPOOL/recv
log_must block_device_wait

log_must mount $recvdev $recvmnt

md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}')
md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}')
[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"

log_pass "zfs can receive raw, recursive, and deduplicated send streams"

0 comments on commit 149ce88

Please sign in to comment.