Skip to content

Commit

Permalink
Fix issues with raw receive_write_byref()
Browse files Browse the repository at this point in the history
This patch fixes 2 issues with raw, deduplicated send streams. The
first is that datasets who had been completely received earlier in
the stream were not still marked as raw receives. This caused
problems when newly received datasets attempted to fetch raw data
from these datasets without this flag set.

The second problem was that the arc freeze checksum code was not
consistent about which locks needed to be held while performing
its asserts. The code now guarantees that the hdr lock is held
when iterating through the linked list of buffers and the
b_freeze_lock is held when attempting to read or modify the
b_freeze_cksum. This is not strictly a problem with the write_byref
code, but it seems to be the only consistent code path to trigger
the issue.

Signed-off-by: Tom Caputi <tcaputi@datto.com>
  • Loading branch information
Tom Caputi committed Jul 11, 2018
1 parent 00c405b commit 855fd78
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 19 deletions.
36 changes: 27 additions & 9 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,9 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
static boolean_t
arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
{
ASSERT(hdr->b_l1hdr.b_state == arc_anon ||
HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));

for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
if (!ARC_BUF_COMPRESSED(b)) {
return (B_TRUE);
Expand All @@ -1574,15 +1577,16 @@ arc_cksum_verify(arc_buf_t *buf)
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
if (ARC_BUF_COMPRESSED(buf)) {
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
}

ASSERT(HDR_HAS_L1HDR(hdr));

mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) {
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
Expand Down Expand Up @@ -1729,45 +1733,54 @@ void
arc_buf_thaw(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = HDR_LOCK(hdr);

if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

mutex_enter(hash_lock);
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
ASSERT(!HDR_IO_IN_PROGRESS(hdr));

arc_cksum_verify(buf);

/*
* Compressed buffers do not manipulate the b_freeze_cksum or
* allocate b_thawed.
* Compressed buffers do not manipulate the b_freeze_cksum.
*/
if (ARC_BUF_COMPRESSED(buf)) {
mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
mutex_exit(hash_lock);
return;
}

ASSERT(HDR_HAS_L1HDR(hdr));
arc_cksum_free(hdr);
mutex_exit(hash_lock);
arc_buf_unwatch(buf);
}

void
arc_buf_freeze(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock;
kmutex_t *hash_lock = HDR_LOCK(hdr);

if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;

mutex_enter(hash_lock);
if (ARC_BUF_COMPRESSED(buf)) {
mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
arc_hdr_has_uncompressed_buf(hdr));
mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
mutex_exit(hash_lock);
return;
}

hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);

ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL ||
hdr->b_l1hdr.b_state == arc_anon);
Expand Down Expand Up @@ -2179,11 +2192,12 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
if (hash_lock != NULL)
mutex_enter(hash_lock);
arc_buf_untransform_in_place(buf, hash_lock);
/* Compute the hdr's checksum if necessary */
arc_cksum_compute(buf);
if (hash_lock != NULL)
mutex_exit(hash_lock);

/* Compute the hdr's checksum if necessary */
arc_cksum_compute(buf);

}

return (0);
Expand Down Expand Up @@ -2274,7 +2288,11 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
}

/* Compute the hdr's checksum if necessary */
if (hash_lock != NULL)
mutex_enter(hash_lock);
arc_cksum_compute(buf);
if (hash_lock != NULL)
mutex_exit(hash_lock);

return (0);
}
Expand Down
1 change: 0 additions & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,6 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
!DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
err = 0;


DB_DNODE_EXIT(db);

return (err);
Expand Down
9 changes: 7 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *dst_handle;
dmu_buf_impl_t *dstdb;
dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
dmu_object_type_t type;
arc_buf_t *abuf;
uint64_t datalen;
boolean_t byteorder;
Expand All @@ -1636,19 +1637,23 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dstdb = (dmu_buf_impl_t *)dst_handle;
datalen = arc_buf_size(srcdb->db_buf);

DB_DNODE_ENTER(dstdb);
type = DB_DNODE(dstdb)->dn_type;
DB_DNODE_EXIT(dstdb);

/* allocated an arc buffer that matches the type of srcdb->db_buf */
if (arc_is_encrypted(srcdb->db_buf)) {
arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
byteorder, salt, iv, mac, type,
datalen, arc_buf_lsize(srcdb->db_buf),
arc_get_compression(srcdb->db_buf));
} else {
/* we won't get a compressed db back from dmu_buf_hold() */
ASSERT3U(arc_get_compression(srcdb->db_buf),
==, ZIO_COMPRESS_OFF);
abuf = arc_loan_buf(os->os_spa,
DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
DMU_OT_IS_METADATA(type), datalen);
}

ASSERT3U(datalen, ==, arc_buf_size(abuf));
Expand Down
9 changes: 5 additions & 4 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -2252,8 +2252,7 @@ free_guid_map_onexit(void *arg)
guid_map_entry_t *gmep;

while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
dsl_dataset_long_rele(gmep->gme_ds, gmep);
dsl_dataset_rele_flags(gmep->gme_ds,
dsl_dataset_disown(gmep->gme_ds,
(gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep);
kmem_free(gmep, sizeof (guid_map_entry_t));
}
Expand Down Expand Up @@ -2846,6 +2845,9 @@ receive_write_byref(struct receive_writer_arg *rwa,
}
if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
return (SET_ERROR(EINVAL));

if (gmep->raw)
ref_os->os_raw_receive = B_TRUE;
} else {
ref_os = rwa->os;
}
Expand Down Expand Up @@ -4159,13 +4161,12 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
if (err != 0)
return (err);
gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds);
err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
if (err == 0) {
gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
gmep->raw = raw;
gmep->gme_ds = snapds;
avl_add(guid_map, gmep);
dsl_dataset_long_hold(snapds, gmep);
} else {
kmem_free(gmep, sizeof (*gmep));
}
Expand Down
4 changes: 2 additions & 2 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
'arc_summary_001_pos', 'arc_summary_002_neg',
'arc_summary_001_pos', 'arc_summary_002_neg',
'arc_summary3_001_pos', 'dbufstat_001_pos']
user =
tags = ['functional', 'cli_user', 'misc']
Expand Down Expand Up @@ -743,7 +743,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy',
'send_freeobjects', 'send_realloc_dnode_size']
'send_freeobjects', 'send_realloc_dnode_size', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend']

[tests/functional/scrub_mirror]
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/rsend/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ dist_pkgdata_SCRIPTS = \
send-c_zstreamdump.ksh \
send-cpL_varied_recsize.ksh \
send_freeobjects.ksh \
send_realloc_dnode_size.ksh
send_realloc_dnode_size.ksh \
send-wDR_encrypted_zvol.ksh

dist_pkgdata_DATA = \
rsend.cfg \
Expand Down
95 changes: 95 additions & 0 deletions tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2018 by Datto Inc. All rights reserved.
#

. $STF_SUITE/tests/functional/rsend/rsend.kshlib

#
# DESCRIPTION:
# Verify that zvols with dedup=on and encryption=on can be sent and received
# with a deduplicated raw send stream.
#
# STRATEGY:
# 1. Create a zvol with dedup and encryption on and put a filesystem on it
# 2. Copy a file into the zvol a few times and take a snapshot
# 3. Repeat step 2 a few times to create more snapshots
# 4. Send all snapshots in a recursive, raw, deduplicated send stream
# 5. Mount the received zvol and verify that all of the data there is correct
#

verify_runnable "both"

function cleanup
{
ismounted $recvmnt ext4 && log_must umount $recvmnt
ismounted $mntpnt ext4 && log_must umount $mntpnt
[[ -d $recvmnt ]] && log_must rm -rf $keyfile
[[ -d $mntpnt ]] && log_must rm -rf $keyfile
datasetexists $TESTPOOL/recv && \
log_must zfs destroy -r $TESTPOOL/recv
datasetexists $TESTPOOL/$TESTVOL && \
log_must zfs destroy -r $TESTPOOL/$TESTVOL
[[ -f $keyfile ]] && log_must rm $keyfile
[[ -f $sendfile ]] && log_must rm $sendfile
}
log_onexit cleanup

log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"

typeset keyfile=/$TESTPOOL/pkey
typeset snap_count=5
typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
typeset mntpnt=$TESTDIR/$TESTVOL
typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
typeset recvmnt=$TESTDIR/recvmnt
typeset sendfile=$TESTDIR/sendfile

log_must eval "echo 'password' > $keyfile"

log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
-o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
log_must block_device_wait

log_must eval "echo 'y' | newfs -t ext4 -v $zdev"
log_must mkdir -p $mntpnt
log_must mkdir -p $recvmnt
log_must mount $zdev $mntpnt

for ((i = 1; i <= $snap_count; i++)); do
log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
for ((j = 0; j < 10; j++)); do
log_must cp $mntpnt/file $mntpnt/file$j
done

log_must sync
log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
done

log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
log_must zfs load-key $TESTPOOL/recv
log_must block_device_wait

log_must mount $recvdev $recvmnt

md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}')
md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}')
[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"

log_pass "zfs can receive raw, recursive, and deduplicated send streams"

0 comments on commit 855fd78

Please sign in to comment.