Don't invalidate relation cache if cagg watermark constify is false #1808

Workflow file for this run

.github/workflows/libfuzzer.yaml at 02b76fa

	name: Libfuzzer
	"on":
	schedule:
	# run daily 1:00 on main branch
	- cron: '0 1 * * *'
	push:
	branches:
	- main
	- prerelease_test
	- trigger/libfuzzer
	pull_request:
	paths:
	- .github/workflows/libfuzzer.yaml
	- 'tsl/test/fuzzing/compression/**'

	jobs:
	build:
	runs-on: ubuntu-22.04
	name: Build PostgreSQL and TimescaleDB
	env:
	PG_SRC_DIR: pgbuild
	PG_INSTALL_DIR: postgresql

	steps:
	- name: Install Linux Dependencies
	run: \|
	# Don't add ddebs here because the ddebs mirror is always 503 Service Unavailable.
	# If needed, install them before opening the core dump.
	sudo apt-get update
	sudo apt-get install 7zip clang lld llvm flex bison libipc-run-perl \
	libtest-most-perl tree jq

	- name: Checkout TimescaleDB
	uses: actions/checkout@v4

	- name: Read configuration
	id: config
	run: python -B .github/gh_config_reader.py

	# We are going to rebuild Postgres daily, so that it doesn't suddenly break
	# ages after the original problem.
	- name: Get date for build caching
	id: get-date
	run: \|
	echo "date=$(date +"%d")" >> $GITHUB_OUTPUT

	# we cache the build directory instead of the install directory here
	# because extension installation will write files to install directory
	# leading to a tainted cache
	- name: Cache PostgreSQL
	id: cache-postgresql
	uses: actions/cache@v4
	with:
	path: ~/${{ env.PG_SRC_DIR }}
	key: "postgresql-libfuzzer-${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}"

	- name: Build PostgreSQL
	if: steps.cache-postgresql.outputs.cache-hit != 'true'
	run: \|
	wget -q -O postgresql.tar.bz2 \
	https://ftp.postgresql.org/pub/source/v${{ steps.config.outputs.PG15_LATEST }}/postgresql-${{ steps.config.outputs.PG15_LATEST }}.tar.bz2
	mkdir -p ~/$PG_SRC_DIR
	tar --extract --file postgresql.tar.bz2 --directory ~/$PG_SRC_DIR --strip-components 1
	cd ~/$PG_SRC_DIR
	CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \
	--without-readline --without-zlib --without-libxml --enable-cassert \
	--enable-debug CC=clang \
	CFLAGS="-fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer"
	make -j$(nproc)

	- name: Install PostgreSQL
	run: \|
	make -C ~/$PG_SRC_DIR install
	make -C ~/$PG_SRC_DIR/contrib/postgres_fdw install

	- name: Upload config.log
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: config.log for PostgreSQL
	path: ~/${{ env.PG_SRC_DIR }}/config.log

	- name: Build TimescaleDB
	run: \|
	set -e

	export LIBFUZZER_PATH=$(dirname "$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a \| head -1)")

	# Some pointers for the next time we have linking/undefined symbol problems:
	# http://web.archive.org/web/20200926071757/https://github.com/google/sanitizers/issues/111
	# http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680

	cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \
	-DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \
	-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \
	-DCOMPRESSION_FUZZING=1 -DPG_PATH=$HOME/$PG_INSTALL_DIR

	make -C build -j$(nproc) install

	# Incredibly, the upload-artifact action can't preserve executable permissions:
	# https://github.com/actions/upload-artifact/issues/38
	# It's also extremely slow.
	- name: Compress the installation directory
	run: 7z a install.7z $HOME/$PG_INSTALL_DIR

	- name: Save the installation directory
	uses: actions/upload-artifact@v4
	with:
	name: fuzzing-install-dir
	path: install.7z
	if-no-files-found: error
	retention-days: 1

	fuzz:
	needs: build
	strategy:
	fail-fast: false
	matrix:
	case: [
	{ algo: gorilla , pgtype: float8, bulk: false, runs: 500000000 },
	{ algo: deltadelta, pgtype: int8 , bulk: false, runs: 500000000 },
	{ algo: gorilla , pgtype: float8, bulk: true , runs: 1000000000 },
	{ algo: deltadelta, pgtype: int8 , bulk: true , runs: 1000000000 },
	# array has a peculiar recv function that recompresses all input, so
	# fuzzing it is much slower. The dictionary recv also uses it.
	{ algo: array , pgtype: text , bulk: false, runs: 10000000 },
	{ algo: array , pgtype: text , bulk: true , runs: 10000000 },
	{ algo: dictionary, pgtype: text , bulk: false, runs: 100000000 },
	{ algo: dictionary, pgtype: text , bulk: true , runs: 100000000 },
	]

	name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' \|\| 'rowbyrow' }}
	runs-on: ubuntu-22.04
	env:
	PG_SRC_DIR: pgbuild
	PG_INSTALL_DIR: postgresql
	JOB_NAME: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' \|\| 'rowbyrow' }}

	steps:
	- name: Install Linux dependencies
	run: \|
	sudo apt update
	sudo apt install 7zip systemd-coredump gdb

	- name: Checkout TimescaleDB
	uses: actions/checkout@v4

	- name: Download the installation directory
	uses: actions/download-artifact@v4
	with:
	name: fuzzing-install-dir

	- name: Unpack the installation directory
	run: 7z x -o$HOME install.7z

	- name: initdb
	run: \|
	# Have to do this before initializing the corpus, or initdb will complain.
	set -xeu

	export PGDATA=db
	export PGPORT=5432
	export PGDATABASE=postgres
	export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH
	initdb
	echo "shared_preload_libraries = 'timescaledb'" >> $PGDATA/postgresql.conf

	- name: Set configuration
	id: config
	run: \|
	set -x
	echo "cache_prefix=${{ format('libfuzzer-corpus-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT
	echo "name=${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' \|\| 'rowbyrow' }}" >> $GITHUB_OUTPUT

	- name: Restore the cached fuzzing corpus (bulk)
	id: restore-corpus-cache-bulk
	uses: actions/cache/restore@v4
	with:
	path: db/corpus-bulk
	key: "${{ steps.config.outputs.cache_prefix }}-bulk"

	# We save the row-by-row corpus separately from the bulk corpus, so that
	# they don't overwrite each other. Now we are going to combine them.
	- name: Restore the cached fuzzing corpus (rowbyrow)
	id: restore-corpus-cache-rowbyrow
	uses: actions/cache/restore@v4
	with:
	path: db/corpus-rowbyrow
	key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow"

	- name: Initialize the fuzzing corpus
	run: \|
	# Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository.
	mkdir -p db/corpus{,-rowbyrow,-bulk}
	find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -n -t db/corpus {} +
	find "db/corpus-rowbyrow" -type f -exec cp -n -t db/corpus {} +
	find "db/corpus-bulk" -type f -exec cp -n -t db/corpus {} +
	ls db/corpus \| wc -l

	- name: Run libfuzzer for compression
	run: \|
	set -xeu

	export PGDATA=db
	export PGPORT=5432
	export PGDATABASE=postgres
	export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH
	pg_ctl -o "-clogging_collector=true" -o "-clog_destination=jsonlog,stderr" \
	-o "-clog_directory=$(readlink -f .)" -o "-clog_filename=postmaster.log" \
	-o "-clog_error_verbosity=verbose" start

	psql -c "create extension timescaledb;"

	# Create the fuzzing functions
	export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so"))
	psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype,
	bulk bool, runs int)
	returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;

	create or replace function ts_read_compressed_data_directory(algo cstring,
	pgtype regtype, path cstring, bulk bool)
	returns table(path text, bytes int, rows int, sqlstate text, location text)
	as '"$MODULE_NAME"', 'ts_read_compressed_data_directory' language c;

	"

	# Start more fuzzing processes in the background. We won't even monitor
	# their progress, because the server will panic if they find an error.
	for x in {2..$(nproc)}
	do
	psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}',
	'${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" &
	done

	# Start the one fuzzing process that we will monitor, in foreground.
	# The LLVM fuzzing driver calls exit(), so we expect to lose the connection.
	ret=0
	psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}',
	'${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" \|\| ret=$?
	if ! [ $ret -eq 2 ]
	then
	>&2 echo "Unexpected psql exit code $ret"
	exit 1
	fi

	ls db/corpus \| wc -l

	fn="ts_read_compressed_data_directory('${{ matrix.case.algo }}',
	'${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}')"

	# Show the statistics about fuzzing corpus
	psql -c "select count(*), location, min(sqlstate), min(path)
	from $fn
	group by location order by count(*) desc
	"

	# Save interesting cases because the caches are not available for download from UI
	mkdir -p interesting
	psql -qtAX -c "select distinct on (location) 'db/' \|\| path from $fn
	order by location, bytes, path
	" \| xargs cp -t interesting

	# Check that we don't have any internal errors
	errors=$(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*)
	from $fn
	where sqlstate = 'XX000'")
	echo "Internal program errors: $errors"
	[ $errors -eq 0 ] \|\| exit 1

	# Shouldn't have any WARNINGS in the log.
	! grep -F "] WARNING: " postmaster.log

	# Check that the server is still alive.
	psql -c "select 1"

	- name: Collect the logs
	if: always()
	id: collectlogs
	run: \|
	# wait in case there are in-progress coredumps
	sleep 10
	if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi
	# print OOM killer information
	sudo journalctl --system -q --facility=kern --grep "Killed process" \|\| true

	- name: Save PostgreSQL log
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: PostgreSQL log for ${{ steps.config.outputs.name }}
	path: postgres.*

	- name: Save fuzzer-generated crash cases
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: Crash cases for ${{ steps.config.outputs.name }}
	path: db/crash-*
	if-no-files-found: ignore

	- name: Save interesting cases
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: Interesting cases for ${{ steps.config.outputs.name }}
	path: interesting/

	# We use separate restore/save actions, because the default action won't
	# save the updated folder after the cache hit. We also want to save the
	# cache after fuzzing errors, and the default action doesn't save after
	# errors.
	# We can't overwrite the existing cache, so we add a unique suffix. The
	# cache is matched by key prefix, not exact key, and picks the newest
	# matching item, so this works.
	# The caches for rowbyrow and bulk fuzzing are saved separately, otherwise
	# the slower job would always overwrite the cache from the faster one. We
	# want to combine corpuses from bulk and rowbyrow fuzzing for better
	# coverage.
	# Note that the cache action cannot be restored on a path different from the
	# one it was saved from. To make our lives more interesting, it is not
	# directly documented anywhere, but we can deduce it from path influencing
	# the version.
	- name: Change corpus path to please the 'actions/cache' GitHub Action
	if: always()
	run: \|
	rm -rf db/corpus-{bulk,rowbyrow} \|\|:
	mv -fT db/corpus{,-${{ matrix.case.bulk && 'bulk' \|\| 'rowbyrow' }}}

	- name: Save fuzzer corpus
	if: always()
	uses: actions/cache/save@v4
	with:
	path: db/corpus-${{ matrix.case.bulk && 'bulk' \|\| 'rowbyrow' }}
	key: "${{ format('{0}-{1}-{2}-{3}',
	steps.config.outputs.cache_prefix,
	matrix.case.bulk && 'bulk' \|\| 'rowbyrow',
	github.run_id, github.run_attempt) }}"

	- name: Stack trace
	if: always() && steps.collectlogs.outputs.coredumps == 'true'
	run: \|
	sudo coredumpctl gdb <<<"
	set verbose on
	set trace-commands on
	show debug-file-directory
	printf "'"'"query = '%s'\n\n"'"'", debug_query_string
	frame function ExceptionalCondition
	printf "'"'"condition = '%s'\n"'"'", conditionName
	up 1
	l
	info args
	info locals
	bt full
	" 2>&1 \| tee stacktrace.log
	./scripts/bundle_coredumps.sh
	exit 1 # Fail the job if we have core dumps.

	- name: Upload core dumps
	if: always() && steps.collectlogs.outputs.coredumps == 'true'
	uses: actions/upload-artifact@v4
	with:
	name: Coredumps for ${{ steps.config.outputs.name }}
	path: coredumps

	- name: Upload test results to the database
	if: always()
	env:
	CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
	GITHUB_EVENT_NAME: ${{ github.event_name }}
	GITHUB_REF_NAME: ${{ github.ref_name }}
	GITHUB_REPOSITORY: ${{ github.repository }}
	GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
	GITHUB_RUN_ID: ${{ github.run_id }}
	GITHUB_RUN_NUMBER: ${{ github.run_number }}
	JOB_STATUS: ${{ job.status }}
	run: \|
	if [[ "${{ github.event_name }}" == "pull_request" ]] ;
	then
	GITHUB_PR_NUMBER="${{ github.event.number }}"
	else
	GITHUB_PR_NUMBER=0
	fi
	export GITHUB_PR_NUMBER
	scripts/upload_ci_stats.sh

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Don't invalidate relation cache if cagg watermark constify is false #1808

Workflow file

Don't invalidate relation cache if cagg watermark constify is false #1808

Jobs

Run details

Workflow file for this run