diff --git a/Makefile b/Makefile index 67bd9b0f4..ccbb1cac3 100644 --- a/Makefile +++ b/Makefile @@ -47,17 +47,17 @@ fix: $(FIXERS) # END: lint-install ../bincapz -SAMPLES_REPO=https://github.com/chainguard-dev/bincapz-samples.git -SAMPLES_HASH=bdcb8c2e9bf557a0abe3e2b0144f437d456299b7 -OUT_DIR=out/samples-$(SAMPLES_HASH).tmp -out/samples-$(SAMPLES_HASH): +SAMPLES_REPO ?= chainguard-dev/bincapz-samples +SAMPLES_COMMIT ?= bdcb8c2e9bf557a0abe3e2b0144f437d456299b7 +OUT_DIR=out/samples-$(SAMPLES_COMMIT).tmp +out/samples-$(SAMPLES_COMMIT): mkdir -p out - git clone $(SAMPLES_REPO) $(OUT_DIR) - git -C $(OUT_DIR) checkout $(SAMPLES_HASH) + git clone https://github.com/$(SAMPLES_REPO).git $(OUT_DIR) + git -C $(OUT_DIR) checkout $(SAMPLES_COMMIT) find $(OUT_DIR) -name "*.xz" -execdir tar xJvf "{}" \; mv $(OUT_DIR) $(basename $(OUT_DIR)) -prepare-samples: out/samples-$(SAMPLES_HASH) +prepare-samples: out/samples-$(SAMPLES_COMMIT) cp -a test_data/. $(basename $(OUT_DIR)) .PHONY: test @@ -124,9 +124,8 @@ update-third-party: ./third_party/yara/update.sh .PHONY: refresh-sample-testdata out/bincapz -refresh-sample-testdata: clone-samples out/bincapz - cp ./test_data/refresh-testdata.sh samples/ - ./out/samples/refresh-testdata.sh ./out/bincapz +refresh-sample-testdata: out/samples-$(SAMPLES_COMMIT) out/bincapz + ./test_data/refresh-testdata.sh ./out/bincapz out/samples-$(SAMPLES_COMMIT) ARCH ?= $(shell uname -m) CRANE_VERSION=v0.20.2 diff --git a/test_data/refresh-testdata.sh b/test_data/refresh-testdata.sh index dabbb7282..7204f597d 100755 --- a/test_data/refresh-testdata.sh +++ b/test_data/refresh-testdata.sh @@ -2,110 +2,124 @@ # refresh testdata with latest bincapz # # usage: -# ./refresh-testdata.sh +# ./refresh-testdata.sh # # NOTE: This is slow to run, so for small changes you are better # off manually updating a single test file. -set -ux -o pipefail +set -eu -o pipefail +MAX_PROCS=${MAX_PROCS:=8} readonly bincapz=$(realpath $1) -readonly root_dir=$(dirname $0) -cd "${root_dir}" +readonly samples=$(realpath $2) + +cd "$(dirname $0)" +cd .. +readonly root_dir=$(pwd) +readonly test_data="${root_dir}/test_data" if [[ -z "${bincapz}" ]]; then - echo "must pass location of bincapz" - exit 1 + echo "must pass location of bincapz" + exit 1 fi if [[ ! -x "${bincapz}" ]]; then - echo "bincapz at ${bincapz} is not executable" - exit 1 + echo "bincapz at ${bincapz} is not executable" + exit 1 fi -# OCI edge case -${bincapz} --format=simple \ - --min-risk any \ - --min-file-risk any \ - -o ../pkg/action/testdata/scan_oci \ - ../pkg/action/testdata/static.tar.xz; sed -i.bak 's|\.\.\/pkg\/action\/||g' ../pkg/action/testdata/scan_oci && rm ../pkg/action/testdata/scan_oci.bak & - -# diffs don't follow an easy rule -${bincapz} --format=markdown \ - -o ../test_data/macOS/2023.3CX/libffmpeg.dirty.mdiff \ - diff \ - macOS/2023.3CX/libffmpeg.dylib \ - macOS/2023.3CX/libffmpeg.dirty.dylib & - -${bincapz} --format=markdown \ - -o ../test_data/macOS/clean/ls.mdiff \ - diff \ - linux/clean/ls.x86_64 \ - macOS/clean/ls & - -${bincapz} --format=simple \ - --min-level 2 \ - --min-file-level 2 \ - -o ../test_data/macOS/clean/ls.sdiff.level_2 \ - diff \ - linux/clean/ls.x86_64 \ - macOS/clean/ls & - -${bincapz} --format=simple \ - --min-level 1 \ - --min-file-level 2 \ - -o ../test_data/macOS/clean/ls.sdiff.trigger_2 \ - diff \ - linux/clean/ls.x86_64 \ - macOS/clean/ls & - -${bincapz} --format=simple \ - --min-level 1 \ - --min-file-level 3 \ - -o ../test_data/macOS/clean/ls.sdiff.trigger_3 \ - diff \ - linux/clean/ls.x86_64 \ - macOS/clean/ls & +readonly qscript=$(mktemp) +function addq() { + echo "$*" >>"${qscript}" +} +# OCI edge case +cd "${root_dir}/pkg/action" +echo "regenerating test data, max_procs=${MAX_PROCS} ..." ${bincapz} --format=simple \ - -o ../test_data/linux/2024.sbcl.market/sbcl.sdiff \ - diff \ - linux/2024.sbcl.market/sbcl.clean \ - linux/2024.sbcl.market/sbcl.dirty & + --min-risk any \ + --min-file-risk any \ + -o testdata/scan_oci \ + analyze testdata/static.tar.xz -${bincapz} --format=simple \ - -o ../test_data/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff \ - diff \ - linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \ - linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst & +cd "${samples}" -${bincapz} --format=simple \ - -o ../test_data/linux/clean/aws-c-io/aws-c-io.sdiff \ - diff \ - linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \ - linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json & -wait - -for f in $(find * -name "*.simple"); do - prog=$(echo ${f} | sed s/\.simple$//g) - if [[ -f "${prog}" ]]; then - ${bincapz} --format=simple -o "../test_data/${f}" scan "${prog}" & - fi +# diffs don't follow an easy rule +addq ${bincapz} --format=markdown \ + -o "${test_data}/macOS/2023.3CX/libffmpeg.dirty.mdiff" \ + diff \ + macOS/2023.3CX/libffmpeg.dylib \ + macOS/2023.3CX/libffmpeg.dirty.dylib + +addq ${bincapz} --format=markdown \ + -o "${test_data}/macOS/clean/ls.mdiff" \ + diff \ + linux/clean/ls.x86_64 \ + macOS/clean/ls + +addq ${bincapz} --format=simple \ + --min-level 2 \ + --min-file-level 2 \ + -o "${test_data}/macOS/clean/ls.sdiff.level_2" \ + diff \ + linux/clean/ls.x86_64 \ + macOS/clean/ls + +addq ${bincapz} --format=simple \ + --min-level 1 \ + --min-file-level 2 \ + -o "${test_data}/macOS/clean/ls.sdiff.trigger_2" \ + diff \ + linux/clean/ls.x86_64 \ + macOS/clean/ls + +addq ${bincapz} --format=simple \ + --min-level 1 \ + --min-file-level 3 \ + -o "${test_data}/macOS/clean/ls.sdiff.trigger_3" \ + diff \ + linux/clean/ls.x86_64 \ + macOS/clean/ls + +addq ${bincapz} --format=simple \ + -o "${test_data}/linux/2024.sbcl.market/sbcl.sdiff" \ + diff \ + linux/2024.sbcl.market/sbcl.clean \ + linux/2024.sbcl.market/sbcl.dirty + +addq ${bincapz} --format=simple \ + -o "${test_data}/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff" \ + diff \ + linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \ + linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst + +addq ${bincapz} --format=simple \ + -o "${test_data}/linux/clean/aws-c-io/aws-c-io.sdiff" \ + diff \ + linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \ + linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json + +for f in $(find "${test_data}" -name "*.simple"); do + prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.simple\$##) + if [[ -f "${prog}" ]]; then + addq ${bincapz} --format=simple -o "${f}" analyze "${prog}" + fi done -wait -for f in $(find * -name "*.md"); do - prog=$(echo ${f} | sed s/\.md$//g) - if [[ -f "${prog}" ]]; then - ${bincapz} --format=markdown -o "../test_data/${f}" scan "${prog}" & - fi +for f in $(find "${test_data}" -name "*.md"); do + prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.md\$##) + if [[ -f "${prog}" ]]; then + addq ${bincapz} --format=markdown -o "${f}" analyze "${prog}" + fi done -wait -for f in $(find * -name "*.json"); do - prog=$(echo ${f} | sed s/\.json$//g) - if [[ -f "${prog}" ]]; then - ${bincapz} --format=json -o "../test_data/${f}" scan "${prog}" & - fi +for f in $(find "${test_data}" -name "*.json"); do + prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.json\$##) + if [[ -f "${prog}" ]]; then + addq ${bincapz} --format=json -o "${f}" analyze "${prog}" + fi done -wait + +echo "processing queue with length: $(wc -l ${qscript})" +tr '\n' '\0' <"${qscript}" | xargs -0 -n1 -P"${MAX_PROCS}" -J% sh -c '%' +echo "test data regeneration complete!!"