Skip to content

Commit

Permalink
Testing raidz_expand_001_pos failure
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyhutter committed Aug 7, 2024
1 parent c7f70a0 commit 836a672
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 24 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/scripts/qemu-6-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,14 @@ esac
cd /var/tmp
uname -a > uname.txt

# ONLY FOR TESTING DO NOT COMMIT
#
TAGS=$2/$3

# TAGS=raidz

# run functional testings
$TDIR/zfs-tests.sh -vK -s 3G -T $2/$3
$TDIR/zfs-tests.sh -vK -s 3G -T $TAGS
RV=$?

# we wont fail here, this will be done later
Expand Down
14 changes: 12 additions & 2 deletions .github/workflows/scripts/qemu-7-reports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,26 @@ for i in `seq 1 $VMs`; do
echo "##[endgroup]"
done

RESPATH="/var/tmp/test_results"

# all tests without grouping:
MERGE="$BASE/.github/workflows/scripts/merge_summary.awk"
$MERGE vm*log.txt | $BASE/scripts/zfs-tests-color.sh
$MERGE vm*log.txt | $BASE/scripts/zfs-tests-color.sh | tee $RESPATH/summary.txt

RESPATH="/var/tmp/test_results"
for i in `seq 1 $VMs`; do
rsync -arL zfs@192.168.122.1$i:$RESPATH/current $RESPATH/vm$i || true
scp zfs@192.168.122.1$i:"/var/tmp/*.txt" $RESPATH/vm$i || true
done
cp -f /var/tmp/*.txt $RESPATH || true


# Save a list of all failed test logs for easy access
awk '/\[FAIL\]|\[KILLED\]/{ show=1; print; next; }; /\[SKIP\]|\[PASS\]/{ show=0; } show' \
$RESPATH/vm*/current/log >> $RESPATH/summary-failure-logs.txt

cp $RESPATH/summary.txt $RESPATH/summary-with-logs.txt
cat $RESPATH/summary-failure-logs.txt >> $RESPATH/summary-with-logs.txt

tar cf /tmp/qemu-$OS.tar -C $RESPATH -h . || true

echo "********************************************************************"
Expand Down
61 changes: 47 additions & 14 deletions .github/workflows/zfs-qemu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: zfs-qemu

on:
push:
pull_request:
# pull_request:

jobs:

Expand All @@ -13,29 +13,33 @@ jobs:
matrix:
# all:
# os: [almalinux8, almalinux9, archlinux, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
os: [almalinux8, almalinux9, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
# os: [almalinux8, almalinux9, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
# openzfs:
# os: [almalinux8, almalinux9, centos-stream9, fedora39, fedora40, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
# freebsd:
# os: [almalinux8, almalinux9, fedora39, fedora40, freebsd13, freebsd14, freebsd15, ubuntu20, ubuntu22, ubuntu24]
# os: [almalinux8, almalinux9, fedora39, fedora40, ubuntu22, ubuntu24]
os: [almalinux8]

# os: [freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15]
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Setup SSH
run: |
mkdir -p $HOME/.ssh
echo "ConnectTimeout 1" >> $HOME/.ssh/config
echo "StrictHostKeyChecking no" >> $HOME/.ssh/config
echo "${{ secrets.AUTHORIZED_KEYS }}" >> $HOME/.ssh/authorized_keys
echo "${{ secrets.SSH_KEY }}" > $HOME/.ssh/id_ed25519
echo "${{ secrets.KNOWN_HOSTS }}" >> $HOME/.ssh/known_hosts
chmod 600 $HOME/.ssh/id_ed25519
R=`shuf -n 1 -i 10000-60000`
echo "Port $R"
ssh -x -N -C -f -R $R:127.0.0.1:22 mcmilk@${{ secrets.SOME_HOST }}
# - name: Setup SSH
# run: |
# mkdir -p $HOME/.ssh
# echo "ConnectTimeout 1" >> $HOME/.ssh/config
# echo "StrictHostKeyChecking no" >> $HOME/.ssh/config
# echo "${{ secrets.AUTHORIZED_KEYS }}" >> $HOME/.ssh/authorized_keys
# echo "${{ secrets.SSH_KEY }}" > $HOME/.ssh/id_ed25519
# echo "${{ secrets.KNOWN_HOSTS }}" >> $HOME/.ssh/known_hosts
# chmod 600 $HOME/.ssh/id_ed25519
# R=`shuf -n 1 -i 10000-60000`
# echo "Port $R"
# ssh -x -N -C -f -R $R:127.0.0.1:22 mcmilk@${{ secrets.SOME_HOST }}

- name: Setup QEMU
timeout-minutes: 10
Expand Down Expand Up @@ -87,12 +91,41 @@ jobs:
run: .github/workflows/scripts/qemu-7-reports.sh

- uses: actions/upload-artifact@v4
id: artifact-upload-step
if: always()
with:
name: Logs-functional-${{ matrix.os }}
path: /tmp/qemu-${{ matrix.os }}.tar
if-no-files-found: ignore

- name: Test Summary
if: success() || failure()
run: |
cat /var/tmp/test_results/summary.txt
echo ""
echo "Full logs for download:"
echo '${{ steps.artifact-upload-step.outputs.artifact-url }}'
echo "File listing:"
ls -l /var/tmp/test_results/
# Did we have a test failure?
if grep -vq 0 /var/tmp/test_results/vm*/exitcode.txt ; then
echo "One or more tests failed"
cat /var/tmp/test_results/summary-failure-logs.txt
# On test failures, the github test results page will autoscroll to the
# bottom of summary-with-logs.txt. The first thing we want to see
# is the summary page of failures so print another copy of it at
# the bottom of the "page" for easy access.
echo ""
cat /var/tmp/test_results/summary.txt
echo ""
echo "Full logs for download:"
echo '${{ steps.artifact-upload-step.outputs.artifact-url }}'
false
else
true
fi
cleanup:
if: always()
name: Cleanup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,16 @@ function do_vol_test

# Ubuntu 20.04 wants a sync here
log_must sync

# Not sure if this 'udevadm settle' is needed, but we once saw this
# failure and want to attempt to mitigate it:
#
# ERROR: zfs destroy testpool/testvol1 exited 1
# cannot destroy 'testpool/testvol1': dataset is busy
#
if is_linux; then
udevadm settle
fi

log_must zfs destroy $vol
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ for raid_type in "draid2:3d:6c:1s" "raidz2"; do
log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE"

# Check no ONLINE slow vdevs are show. Then mark IOs greater than
# 10ms slow, delay IOs 20ms to vdev6, check slow IOs.
# 160ms slow, delay IOs 160ms to vdev6, check slow IOs.
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE"

log_must set_tunable64 ZIO_SLOW_IO_MS 10
log_must zinject -d $TESTDIR/vdev6 -D20:100 $TESTPOOL2
log_must set_tunable64 ZIO_SLOW_IO_MS 160
log_must zinject -d $TESTDIR/vdev6 -D320:100 $TESTPOOL2
log_must mkfile 1048576 /$TESTPOOL2/testfile
sync_pool $TESTPOOL2
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
Expand All @@ -91,6 +91,7 @@ for raid_type in "draid2:3d:6c:1s" "raidz2"; do
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev1 "ONLINE"
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev2 "ONLINE"
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev3 "ONLINE"

log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev1 | grep ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev2 | grep ONLINE"
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev3 | grep ONLINE"
Expand Down
5 changes: 4 additions & 1 deletion tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ for arg in ${args[*]}; do
fi
log_must touch $TESTFILE
typeset -i crtime1=$(stat_crtime $TESTFILE)
if (( crtime1 != crtime )); then

# On slow test machines, there's a slight chance crtime1 rolls over
# to the next second. If that happens count it as ok.
if ! within_tolerance $crtime1 $crtime 1 ; then
log_fail "touch modified crtime ($crtime1 != $crtime)"
fi
done
Expand Down
21 changes: 18 additions & 3 deletions tests/zfs-tests/tests/functional/raidz/raidz_expand_001_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ function test_scrub # <pool> <parity> <dir>
typeset nparity=$2
typeset dir=$3
typeset combrec=$4

log_note "Begin: $1, $2, $3, $4"
reflow_size=$(get_pool_prop allocated $pool)
randbyte=$(( ((RANDOM<<15) + RANDOM) % $reflow_size ))
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES $randbyte
Expand All @@ -151,10 +151,18 @@ function test_scrub # <pool> <parity> <dir>
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done
log_must sync

log_must zpool import -o cachefile=none -d $dir $pool

if is_pool_scrubbing $pool ; then
wait_scrubbed $pool
fi
log_note "=== About to scrub1 ==="
log_note "$(zpool status -vs)"
log_must zpool scrub -w $pool
log_note "=== After scrub1 ==="
log_note "$(zpool status -vs)"

log_must zpool clear $pool
log_must zpool export $pool

Expand All @@ -163,10 +171,17 @@ function test_scrub # <pool> <parity> <dir>
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done
log_must sync

log_must zpool import -o cachefile=none -d $dir $pool

if is_pool_scrubbing $pool ; then
wait_scrubbed $pool
fi
log_note "=== About to scrub2 ==="
log_note "$(zpool status -vs)"
log_must zpool scrub -w $pool
log_note "=== After scrub2 ==="
log_note "$(zpool status -vs)"

log_must check_pool_status $pool "errors" "No known data errors"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ for disk in ${disks[$(($nparity+2))..$devs]}; do
log_fail "pool $pool not expanded"
fi

# It's possible the pool could be auto scrubbing here. If so, wait.
if is_pool_scrubbing $pool ; then
wait_scrubbed $pool
fi
verify_pool $pool

pool_size=$expand_size
Expand Down

0 comments on commit 836a672

Please sign in to comment.