Skip to content

Commit

Permalink
DLPX-47082 panic in bpobj_space(): null pointer dereference
Browse files Browse the repository at this point in the history
  • Loading branch information
sdimitro authored and ahrens committed Feb 2, 2017
1 parent 0f676dc commit deb33f3
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 11 deletions.
4 changes: 2 additions & 2 deletions module/zfs/bpobj.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/

#include <sys/bpobj.h>
Expand Down Expand Up @@ -395,6 +395,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
return;
}

mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
if (bpo->bpo_phys->bpo_subobjs == 0) {
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
Expand All @@ -405,7 +406,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);

mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
sizeof (subobj), &subobj, tx);
Expand Down
30 changes: 22 additions & 8 deletions module/zfs/dsl_deadlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
zap_cursor_t zc;
zap_attribute_t za;

ASSERT(MUTEX_HELD(&dl->dl_lock));

ASSERT(!dl->dl_oldfmt);
if (dl->dl_havetree)
return;
Expand Down Expand Up @@ -178,6 +180,7 @@ static void
dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
const blkptr_t *bp, dmu_tx_t *tx)
{
ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object ==
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
Expand All @@ -194,6 +197,7 @@ static void
dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj, dmu_tx_t *tx)
{
ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
Expand All @@ -218,15 +222,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
return;
}

mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);

dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used +=
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
mutex_exit(&dl->dl_lock);

dle_tofind.dle_mintxg = bp->blk_birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
Expand All @@ -243,6 +246,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)

ASSERT3P(dle, !=, NULL);
dle_enqueue(dl, dle, bp, tx);
mutex_exit(&dl->dl_lock);
}

/*
Expand All @@ -258,16 +262,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;

dsl_deadlist_load_tree(dl);

dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;

mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);

obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);

VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
mutex_exit(&dl->dl_lock);
}

/*
Expand All @@ -282,6 +289,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;

mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);

dle_tofind.dle_mintxg = mintxg;
Expand All @@ -295,6 +303,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
kmem_free(dle, sizeof (*dle));

VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
mutex_exit(&dl->dl_lock);
}

/*
Expand Down Expand Up @@ -338,6 +347,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
return (newobj);
}

mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);

for (dle = avl_first(&dl->dl_tree); dle;
Expand All @@ -351,6 +361,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
mutex_exit(&dl->dl_lock);
return (newobj);
}

Expand Down Expand Up @@ -428,18 +439,18 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
uint64_t used, comp, uncomp;
bpobj_t bpo;

ASSERT(MUTEX_HELD(&dl->dl_lock));

VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);

dsl_deadlist_load_tree(dl);

dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used += used;
dl->dl_phys->dl_comp += comp;
dl->dl_phys->dl_uncomp += uncomp;
mutex_exit(&dl->dl_lock);

dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
Expand Down Expand Up @@ -479,6 +490,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
return;
}

mutex_enter(&dl->dl_lock);
for (zap_cursor_init(&zc, dl->dl_os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
Expand All @@ -493,6 +505,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
dmu_buf_rele(bonus, FTAG);
mutex_exit(&dl->dl_lock);
}

/*
Expand All @@ -507,6 +520,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
avl_index_t where;

ASSERT(!dl->dl_oldfmt);

mutex_enter(&dl->dl_lock);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
dsl_deadlist_load_tree(dl);

Expand All @@ -522,14 +537,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,

VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
mutex_enter(&dl->dl_lock);
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
dl->dl_phys->dl_used -= used;
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
mutex_exit(&dl->dl_lock);

VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
Expand All @@ -540,4 +553,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
kmem_free(dle, sizeof (*dle));
dle = dle_next;
}
mutex_exit(&dl->dl_lock);
}
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ tests = ['rollback_001_pos', 'rollback_002_pos',
'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
'snapshot_015_pos', 'snapshot_017_pos']
'snapshot_015_pos', 'snapshot_017_pos', 'deadlist_lock']

# DISABLED:
# snapused_004_pos - https://github.com/zfsonlinux/zfs/issues/5513
Expand Down
104 changes: 104 additions & 0 deletions usr/src/test/zfs-tests/tests/functional/snapshot/deadlist_lock.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/ksh -p
#
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright (c) 2016 by Delphix. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/snapshot/snapshot.cfg

#
# DESCRIPTION:
#
# This test ensures that the following race condition does not
# take place:
#
# 1] A sync thread inserts a new entry in the deadlist of a
# snapshot. The dle_bpobj at that entry currently is the
# empty bpobj (our sentinel), so we close it and we are
# about to reopen it. (see dle_enqueue())
#
# 2] At the same time a thread executing an administrative
# command that uses dsl_deadlist_space_range() is about
# to dereference that same bpobj that was just closed
# and therefore is NULL.
#
# 3] The sync thread loses the race and we dereference the
# NULL pointer in the kernel.
#
# STRATEGY:
#
# 1. Setup a folder and create a bunch of test files. Take a
# snapshot right after you create a new test file.
# 2. Start DTrace in the background to put a delay in the
# sync thread after it closes the empty bpobj and before
# it reopens it.
# 3. Start a process in the backgroud that runs zfs-destroy
# dry-runs in an infinite loop. The idea is to keep calling
# dsl_deadlist_space_range().
# 4. Go ahead and start removing the test files. This should
# start populating the deadlist of each snapshot with
# entries and go through the dle_enqueue() target code.
# 5. If the test passes, kill the process running on a loop
# and dtrace, and cleanup the dataset.
#

verify_runnable "both"


DLDS="dl_race"

function cleanup
{
log_must kill -9 $DLOOP_PID $DTRACE_PID
log_must zfs destroy -fR $TESTPOOL/$TESTFS/$DLDS
}

function setup
{
log_must zfs create $TESTPOOL/$TESTFS/$DLDS
for i in {1..50}; do
log_must mkfile 1m /$TESTDIR/$DLDS/dl_test_file$i
log_must zfs snapshot $TESTPOOL/$TESTFS/$DLDS@snap${i}
done
}

function destroy_nv_loop
{
while true; do
log_must zfs destroy -nv $TESTPOOL/$TESTFS/$DLDS@snap1%snap50
done
}

log_onexit cleanup

setup
log_must sync

log_must dtrace -qwn "fbt::bpobj_decr_empty:entry { chill(500000000); }" &
DTRACE_PID="$!"
sleep 1

destroy_nv_loop &
DLOOP_PID="$!"
sleep 1

for i in {1..50}; do
log_must rm /$TESTDIR/$DLDS/dl_test_file$i
done
log_must sync

log_pass "There should be no race condition when an administrative command" \
" attempts to read a deadlist's entries at the same time a that a sync" \
" thread is manipulating it."

0 comments on commit deb33f3

Please sign in to comment.