Skip to content

Commit

Permalink
refresh-sample-testdata refactor (chainguard-dev#450)
Browse files Browse the repository at this point in the history
  • Loading branch information
tstromberg authored Sep 16, 2024
1 parent 1c70ba5 commit 5bbba0e
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 95 deletions.
19 changes: 9 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,17 @@ fix: $(FIXERS)

# END: lint-install ../bincapz

SAMPLES_REPO=https://github.com/chainguard-dev/bincapz-samples.git
SAMPLES_HASH=bdcb8c2e9bf557a0abe3e2b0144f437d456299b7
OUT_DIR=out/samples-$(SAMPLES_HASH).tmp
out/samples-$(SAMPLES_HASH):
SAMPLES_REPO ?= chainguard-dev/bincapz-samples
SAMPLES_COMMIT ?= bdcb8c2e9bf557a0abe3e2b0144f437d456299b7
OUT_DIR=out/samples-$(SAMPLES_COMMIT).tmp
out/samples-$(SAMPLES_COMMIT):
mkdir -p out
git clone $(SAMPLES_REPO) $(OUT_DIR)
git -C $(OUT_DIR) checkout $(SAMPLES_HASH)
git clone https://github.com/$(SAMPLES_REPO).git $(OUT_DIR)
git -C $(OUT_DIR) checkout $(SAMPLES_COMMIT)
find $(OUT_DIR) -name "*.xz" -execdir tar xJvf "{}" \;
mv $(OUT_DIR) $(basename $(OUT_DIR))

prepare-samples: out/samples-$(SAMPLES_HASH)
prepare-samples: out/samples-$(SAMPLES_COMMIT)
cp -a test_data/. $(basename $(OUT_DIR))

.PHONY: test
Expand Down Expand Up @@ -124,9 +124,8 @@ update-third-party:
./third_party/yara/update.sh

.PHONY: refresh-sample-testdata out/bincapz
refresh-sample-testdata: clone-samples out/bincapz
cp ./test_data/refresh-testdata.sh samples/
./out/samples/refresh-testdata.sh ./out/bincapz
refresh-sample-testdata: out/samples-$(SAMPLES_COMMIT) out/bincapz
./test_data/refresh-testdata.sh ./out/bincapz out/samples-$(SAMPLES_COMMIT)

ARCH ?= $(shell uname -m)
CRANE_VERSION=v0.20.2
Expand Down
184 changes: 99 additions & 85 deletions test_data/refresh-testdata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,110 +2,124 @@
# refresh testdata with latest bincapz
#
# usage:
# ./refresh-testdata.sh </path/to/bincapz>
# ./refresh-testdata.sh </path/to/bincapz> </path/to/samples>
#
# NOTE: This is slow to run, so for small changes you are better
# off manually updating a single test file.

set -ux -o pipefail
set -eu -o pipefail

MAX_PROCS=${MAX_PROCS:=8}
readonly bincapz=$(realpath $1)
readonly root_dir=$(dirname $0)
cd "${root_dir}"
readonly samples=$(realpath $2)

cd "$(dirname $0)"
cd ..
readonly root_dir=$(pwd)
readonly test_data="${root_dir}/test_data"

if [[ -z "${bincapz}" ]]; then
echo "must pass location of bincapz"
exit 1
echo "must pass location of bincapz"
exit 1
fi

if [[ ! -x "${bincapz}" ]]; then
echo "bincapz at ${bincapz} is not executable"
exit 1
echo "bincapz at ${bincapz} is not executable"
exit 1
fi

# OCI edge case
${bincapz} --format=simple \
--min-risk any \
--min-file-risk any \
-o ../pkg/action/testdata/scan_oci \
../pkg/action/testdata/static.tar.xz; sed -i.bak 's|\.\.\/pkg\/action\/||g' ../pkg/action/testdata/scan_oci && rm ../pkg/action/testdata/scan_oci.bak &

# diffs don't follow an easy rule
${bincapz} --format=markdown \
-o ../test_data/macOS/2023.3CX/libffmpeg.dirty.mdiff \
diff \
macOS/2023.3CX/libffmpeg.dylib \
macOS/2023.3CX/libffmpeg.dirty.dylib &

${bincapz} --format=markdown \
-o ../test_data/macOS/clean/ls.mdiff \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 2 \
--min-file-level 2 \
-o ../test_data/macOS/clean/ls.sdiff.level_2 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 1 \
--min-file-level 2 \
-o ../test_data/macOS/clean/ls.sdiff.trigger_2 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 1 \
--min-file-level 3 \
-o ../test_data/macOS/clean/ls.sdiff.trigger_3 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &
readonly qscript=$(mktemp)
function addq() {
echo "$*" >>"${qscript}"
}

# OCI edge case
cd "${root_dir}/pkg/action"
echo "regenerating test data, max_procs=${MAX_PROCS} ..."
${bincapz} --format=simple \
-o ../test_data/linux/2024.sbcl.market/sbcl.sdiff \
diff \
linux/2024.sbcl.market/sbcl.clean \
linux/2024.sbcl.market/sbcl.dirty &
--min-risk any \
--min-file-risk any \
-o testdata/scan_oci \
analyze testdata/static.tar.xz

${bincapz} --format=simple \
-o ../test_data/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff \
diff \
linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \
linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst &
cd "${samples}"

${bincapz} --format=simple \
-o ../test_data/linux/clean/aws-c-io/aws-c-io.sdiff \
diff \
linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \
linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json &
wait

for f in $(find * -name "*.simple"); do
prog=$(echo ${f} | sed s/\.simple$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=simple -o "../test_data/${f}" scan "${prog}" &
fi
# diffs don't follow an easy rule
addq ${bincapz} --format=markdown \
-o "${test_data}/macOS/2023.3CX/libffmpeg.dirty.mdiff" \
diff \
macOS/2023.3CX/libffmpeg.dylib \
macOS/2023.3CX/libffmpeg.dirty.dylib

addq ${bincapz} --format=markdown \
-o "${test_data}/macOS/clean/ls.mdiff" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 2 \
--min-file-level 2 \
-o "${test_data}/macOS/clean/ls.sdiff.level_2" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 1 \
--min-file-level 2 \
-o "${test_data}/macOS/clean/ls.sdiff.trigger_2" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 1 \
--min-file-level 3 \
-o "${test_data}/macOS/clean/ls.sdiff.trigger_3" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
-o "${test_data}/linux/2024.sbcl.market/sbcl.sdiff" \
diff \
linux/2024.sbcl.market/sbcl.clean \
linux/2024.sbcl.market/sbcl.dirty

addq ${bincapz} --format=simple \
-o "${test_data}/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff" \
diff \
linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \
linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst

addq ${bincapz} --format=simple \
-o "${test_data}/linux/clean/aws-c-io/aws-c-io.sdiff" \
diff \
linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \
linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json

for f in $(find "${test_data}" -name "*.simple"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.simple\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=simple -o "${f}" analyze "${prog}"
fi
done
wait
for f in $(find * -name "*.md"); do
prog=$(echo ${f} | sed s/\.md$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=markdown -o "../test_data/${f}" scan "${prog}" &
fi
for f in $(find "${test_data}" -name "*.md"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.md\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=markdown -o "${f}" analyze "${prog}"
fi
done
wait
for f in $(find * -name "*.json"); do
prog=$(echo ${f} | sed s/\.json$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=json -o "../test_data/${f}" scan "${prog}" &
fi
for f in $(find "${test_data}" -name "*.json"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.json\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=json -o "${f}" analyze "${prog}"
fi
done
wait
echo "processing queue with length: $(wc -l ${qscript})"
tr '\n' '\0' <"${qscript}" | xargs -0 -n1 -P"${MAX_PROCS}" -J% sh -c '%'
echo "test data regeneration complete!!"

0 comments on commit 5bbba0e

Please sign in to comment.