Skip to content

Faster FSST decompression (#1769) #1226

Faster FSST decompression (#1769)

Faster FSST decompression (#1769) #1226

Workflow file for this run

name: Benchmarks
on:
push:
branches: [ develop ]
permissions:
id-token: write # enables AWS-GitHub OIDC
actions: read
contents: write
deployments: write
jobs:
commit-metadata:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Commit Metadata
shell: bash
run: |
set -Eeu -o pipefail -x
sudo apt-get update && sudo apt-get install -y jq
bash scripts/commit-json.sh > new-commit.json
bash scripts/cat-s3.sh vortex-benchmark-results-database commits.json new-commit.json
bench:
strategy:
matrix:
benchmark:
- id: datafusion
name: DataFusion
- id: random_access
name: Random Access
- id: compress
name: Vortex Compression
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/setup-duckdb-action@v1.0.10
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo install cargo-criterion
sudo apt-get update && sudo apt-get install -y jq
cargo criterion \
--bench ${{ matrix.benchmark.id }} \
--message-format=json \
> ${{ matrix.benchmark.id }}-raw.json
cat ${{ matrix.benchmark.id }}-raw.json \
| bash scripts/coerce-criterion-json.sh \
> ${{ matrix.benchmark.id }}.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Benchmark Results
shell: bash
run: |
bash scripts/cat-s3.sh vortex-benchmark-results-database data.json ${{ matrix.benchmark.id }}.json
tpch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/setup-duckdb-action@v1.0.10
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run TPC-H benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo run --bin tpch_benchmark --release -- -d gh-json -t 1 | tee tpch.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Benchmark Results
shell: bash
run: |
bash scripts/cat-s3.sh vortex-benchmark-results-database data.json tpch.json
clickbench:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/setup-duckdb-action@v1.0.10
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run Clickbench benchmark
shell: bash
env:
BENCH_VORTEX_RATIOS: '.*'
RUSTFLAGS: '-C target-cpu=native'
HOME: /home/ci-runner
run: |
cargo run --bin clickbench --release -- -d gh-json | tee clickbench.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Benchmark Results
shell: bash
run: |
bash scripts/cat-s3.sh vortex-benchmark-results-database data.json clickbench.json