From 149ce888bb14a220036ca3e5b301e3b56451bd71 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Mon, 20 Aug 2018 14:03:56 -0400 Subject: [PATCH] Fix issues with raw receive_write_byref() This patch fixes 2 issues with raw, deduplicated send streams. The first is that datasets who had been completely received earlier in the stream were not still marked as raw receives. This caused problems when newly received datasets attempted to fetch raw data from these datasets without this flag set. The second problem was that the arc freeze checksum code was not consistent about which locks needed to be held while performing its asserts. The proper locking needed to run these asserts is actually fairly nuanced, since the asserts touch the linked list of buffers (requiring the header lock), the arc_state (requiring the b_evict_lock), and the b_freeze_cksum (requiring the b_freeze_lock). This seems like a large performance sacrifice and a lot of unneeded complexity to verify that this relatively small debug feature is working as intended, so this patch simply removes these asserts instead. Reviewed by: Matthew Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Tom Caputi Closes #7701 --- module/zfs/arc.c | 45 +++------ module/zfs/dbuf.c | 1 - module/zfs/dmu.c | 9 +- module/zfs/dmu_send.c | 34 +++++-- tests/runfiles/linux.run | 2 +- .../tests/functional/rsend/Makefile.am | 3 +- .../rsend/send-wDR_encrypted_zvol.ksh | 93 +++++++++++++++++++ 7 files changed, 144 insertions(+), 43 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 7f2929c17e1a..f9436d7c471b 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1561,6 +1561,9 @@ arc_cksum_free(arc_buf_hdr_t *hdr) static boolean_t arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) { + ASSERT(hdr->b_l1hdr.b_state == arc_anon || + MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); + for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) { if (!ARC_BUF_COMPRESSED(b)) { return (B_TRUE); @@ -1584,15 +1587,13 @@ arc_cksum_verify(arc_buf_t *buf) if (!(zfs_flags & ZFS_DEBUG_MODIFY)) return; - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); + if (ARC_BUF_COMPRESSED(buf)) return; - } ASSERT(HDR_HAS_L1HDR(hdr)); mutex_enter(&hdr->b_l1hdr.b_freeze_lock); + if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) { mutex_exit(&hdr->b_l1hdr.b_freeze_lock); return; @@ -1650,11 +1651,7 @@ arc_cksum_compute(arc_buf_t *buf) ASSERT(HDR_HAS_L1HDR(hdr)); mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); - if (hdr->b_l1hdr.b_freeze_cksum != NULL) { - ASSERT(arc_hdr_has_uncompressed_buf(hdr)); - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); - return; - } else if (ARC_BUF_COMPRESSED(buf)) { + if (hdr->b_l1hdr.b_freeze_cksum != NULL || ARC_BUF_COMPRESSED(buf)) { mutex_exit(&hdr->b_l1hdr.b_freeze_lock); return; } @@ -1746,14 +1743,10 @@ arc_buf_thaw(arc_buf_t *buf) arc_cksum_verify(buf); /* - * Compressed buffers do not manipulate the b_freeze_cksum or - * allocate b_thawed. + * Compressed buffers do not manipulate the b_freeze_cksum. */ - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); + if (ARC_BUF_COMPRESSED(buf)) return; - } ASSERT(HDR_HAS_L1HDR(hdr)); arc_cksum_free(hdr); @@ -1763,26 +1756,14 @@ arc_buf_thaw(arc_buf_t *buf) void arc_buf_freeze(arc_buf_t *buf) { - arc_buf_hdr_t *hdr = buf->b_hdr; - kmutex_t *hash_lock; - if (!(zfs_flags & ZFS_DEBUG_MODIFY)) return; - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); + if (ARC_BUF_COMPRESSED(buf)) return; - } - - hash_lock = HDR_LOCK(hdr); - mutex_enter(hash_lock); - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL || - hdr->b_l1hdr.b_state == arc_anon); + ASSERT(HDR_HAS_L1HDR(buf->b_hdr)); arc_cksum_compute(buf); - mutex_exit(hash_lock); } /* @@ -1901,7 +1882,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) void *tmpbuf = NULL; abd_t *abd = hdr->b_l1hdr.b_pabd; - ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); ASSERT(HDR_AUTHENTICATED(hdr)); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); @@ -1971,7 +1952,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb) boolean_t no_crypt = B_FALSE; boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); - ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); ASSERT(HDR_ENCRYPTED(hdr)); arc_hdr_alloc_abd(hdr, B_FALSE); @@ -2090,7 +2071,7 @@ arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock) ASSERT(HDR_ENCRYPTED(hdr)); ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); - ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data, diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index f0bee6c2bb59..1af0d8010807 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1267,7 +1267,6 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)))) err = 0; - DB_DNODE_EXIT(db); return (err); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9d00098c8b0d..88b574d4aa04 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1622,6 +1622,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, dmu_buf_t *dst_handle; dmu_buf_impl_t *dstdb; dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle; + dmu_object_type_t type; arc_buf_t *abuf; uint64_t datalen; boolean_t byteorder; @@ -1637,11 +1638,15 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, dstdb = (dmu_buf_impl_t *)dst_handle; datalen = arc_buf_size(srcdb->db_buf); + DB_DNODE_ENTER(dstdb); + type = DB_DNODE(dstdb)->dn_type; + DB_DNODE_EXIT(dstdb); + /* allocated an arc buffer that matches the type of srcdb->db_buf */ if (arc_is_encrypted(srcdb->db_buf)) { arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac); abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os), - byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type, + byteorder, salt, iv, mac, type, datalen, arc_buf_lsize(srcdb->db_buf), arc_get_compression(srcdb->db_buf)); } else { @@ -1649,7 +1654,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, ASSERT3U(arc_get_compression(srcdb->db_buf), ==, ZIO_COMPRESS_OFF); abuf = arc_loan_buf(os->os_spa, - DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen); + DMU_OT_IS_METADATA(type), datalen); } ASSERT3U(datalen, ==, arc_buf_size(abuf)); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 61d4ebd4ff7b..1f4d3a1048b8 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2343,9 +2343,14 @@ free_guid_map_onexit(void *arg) guid_map_entry_t *gmep; while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { - dsl_dataset_long_rele(gmep->gme_ds, gmep); - dsl_dataset_rele_flags(gmep->gme_ds, - (gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep); + ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT; + + if (gmep->raw) { + gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE; + dsflags &= ~DS_HOLD_FLAG_DECRYPT; + } + + dsl_dataset_disown(gmep->gme_ds, dsflags, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -4237,6 +4242,7 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; + objset_t *os; ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; @@ -4246,13 +4252,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, if (err != 0) return (err); gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); - err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds); + err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds); if (err == 0) { - gmep->guid = dsl_dataset_phys(snapds)->ds_guid; + /* + * If this is a deduplicated raw send stream, we need + * to make sure that we can still read raw blocks from + * earlier datasets in the stream, so we set the + * os_raw_receive flag now. + */ + if (raw) { + err = dmu_objset_from_ds(snapds, &os); + if (err != 0) { + dsl_dataset_disown(snapds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); + kmem_free(gmep, sizeof (*gmep)); + return (err); + } + os->os_raw_receive = B_TRUE; + } + gmep->raw = raw; + gmep->guid = dsl_dataset_phys(snapds)->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); - dsl_dataset_long_hold(snapds, gmep); } else { kmem_free(gmep, sizeof (*gmep)); } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 445463cf8d19..844a02caa86f 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -745,7 +745,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy', 'send_encrypted_props', 'send_freeobjects', 'send_realloc_dnode_size', - 'send_hole_birth'] + 'send_hole_birth', 'send-wDR_encrypted_zvol'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am index d65a9144e589..316bcb4e6774 100644 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am @@ -41,7 +41,8 @@ dist_pkgdata_SCRIPTS = \ send-cpL_varied_recsize.ksh \ send_freeobjects.ksh \ send_realloc_dnode_size.ksh \ - send_hole_birth.ksh + send_hole_birth.ksh \ + send-wDR_encrypted_zvol.ksh dist_pkgdata_DATA = \ rsend.cfg \ diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh new file mode 100755 index 000000000000..49b846e9c332 --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# DESCRIPTION: +# Verify that zvols with dedup=on and encryption=on can be sent and received +# with a deduplicated raw send stream. +# +# STRATEGY: +# 1. Create a zvol with dedup and encryption on and put a filesystem on it +# 2. Copy a file into the zvol a few times and take a snapshot +# 3. Repeat step 2 a few times to create more snapshots +# 4. Send all snapshots in a recursive, raw, deduplicated send stream +# 5. Mount the received zvol and verify that all of the data there is correct +# + +verify_runnable "both" + +function cleanup +{ + ismounted $recvmnt ext4 && log_must umount $recvmnt + ismounted $mntpnt ext4 && log_must umount $mntpnt + [[ -d $recvmnt ]] && log_must rm -rf $keyfile + [[ -d $mntpnt ]] && log_must rm -rf $keyfile + destroy_dataset $TESTPOOL/recv "-r" + destroy_dataset $TESTPOOL/$TESTVOL "-r" + [[ -f $keyfile ]] && log_must rm $keyfile + [[ -f $sendfile ]] && log_must rm $sendfile +} +log_onexit cleanup + +log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams" + +typeset keyfile=/$TESTPOOL/pkey +typeset snap_count=5 +typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL +typeset mntpnt=$TESTDIR/$TESTVOL +typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv +typeset recvmnt=$TESTDIR/recvmnt +typeset sendfile=$TESTDIR/sendfile + +log_must eval "echo 'password' > $keyfile" + +log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL +log_must block_device_wait + +log_must eval "echo 'y' | newfs -t ext4 -v $zdev" +log_must mkdir -p $mntpnt +log_must mkdir -p $recvmnt +log_must mount $zdev $mntpnt + +for ((i = 1; i <= $snap_count; i++)); do + log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1 + for ((j = 0; j < 10; j++)); do + log_must cp $mntpnt/file $mntpnt/file$j + done + + log_must sync + log_must zfs snap $TESTPOOL/$TESTVOL@snap$i +done + +log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile" +log_must eval "zfs recv $TESTPOOL/recv < $sendfile" +log_must zfs load-key $TESTPOOL/recv +log_must block_device_wait + +log_must mount $recvdev $recvmnt + +md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}') +md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}') +[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2" + +log_pass "zfs can receive raw, recursive, and deduplicated send streams"