Skip to content

Different benchmark types #2579

Different benchmark types

Different benchmark types #2579

Workflow file for this run

name: A/B Tests
on:
push:
branches-ignore:
- main
concurrency:
# Include `github.event_name` to avoid pushes to `main` and
# scheduled jobs canceling one another
group: ab_tests-${{ github.event_name }}-${{ github.ref }}
cancel-in-progress: true
defaults:
# Required shell entrypoint to have properly activated conda environments
run:
shell: bash -l {0}
jobs:
discover_ab_envs:
name: Discover A/B environments
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: pip install PyYaml
- name: Generate dynamic matrix
id: set-matrix
run: echo "matrix=$(python ci/scripts/discover_ab_environments.py)" >> $GITHUB_OUTPUT
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
# Everything below this point runs iff there are files matching
# AB_environments/AB_*.{conda,dask}.yaml
# and AB_environments/config.yaml set repeat > 0
tests:
name: A/B Tests - ${{ matrix.runtime-version }}
needs: discover_ab_envs
if: fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
runs-on: ubuntu-latest
timeout-minutes: 120
strategy:
fail-fast: false
max-parallel: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).max_parallel }}
matrix:
pytest_args: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).pytest_args }}
runtime-version: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).runtime }}
repeat: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).repeat }}
h2o_datasets: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).h2o_datasets }}
env:
AB_TEST_NAME: ${{ matrix.runtime-version }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create null hypothesis as a copy of baseline
if: matrix.runtime-version == 'AB_null_hypothesis'
run: |
cd AB_environments
cp AB_baseline.conda.yaml AB_null_hypothesis.conda.yaml
cp AB_baseline.dask.yaml AB_null_hypothesis.dask.yaml
cp AB_baseline.cluster.yaml AB_null_hypothesis.cluster.yaml
- name: Set up environment
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Mambaforge
use-mamba: true
condarc-file: ci/condarc
environment-file: AB_environments/${{ matrix.runtime-version }}.conda.yaml
- name: Add test dependencies
run: mamba env update --file ci/environment-test.yml
- name: Dump environment
run: |
# For debugging
echo -e "--\n--Conda Environment (re-create this with \`conda env create --name <name> -f <output_file>\`)\n--"
mamba env export | grep -E -v '^prefix:.*$'
- name: Convert dask config into environment variables
run: python ci/scripts/dask_config_to_env.py AB_environments/${{ matrix.runtime-version }}.dask.yaml >> $GITHUB_ENV
- name: Run Coiled Runtime Tests
env:
DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.RUNTIME_CI_BOT_AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: us-east-2 # this is needed for boto for some reason
AWS_SECRET_ACCESS_KEY: ${{ secrets.RUNTIME_CI_BOT_AWS_SECRET_ACCESS_KEY }}
PYTHON_STUB_PAT: ${{ secrets.PYTHON_STUB_PAT }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
AB_VERSION: ${{ matrix.runtime-version }}
DB_NAME: ${{ matrix.runtime-version }}-${{ matrix.repeat }}.db
CLUSTER_KWARGS: AB_environments/${{ matrix.runtime-version }}.cluster.yaml
H2O_DATASETS: ${{ matrix.h2o_datasets }}
MEMRAY_PROFILE: "scheduler"
run: pytest --benchmark --memray $MEMRAY_PROFILE ${{ matrix.pytest_args }}
- name: Dump coiled.Cluster kwargs
run: cat cluster_kwargs.merged.yaml
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: ${{ matrix.runtime-version }}-${{ matrix.repeat }}
path: |
${{ matrix.runtime-version }}-${{ matrix.repeat }}.db
cluster_kwargs.*.*
mamba_env_export.yml
process-results:
needs: [discover_ab_envs, tests]
name: Combine separate benchmark results
if: always() && fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
runs-on: ubuntu-latest
concurrency:
# Fairly strict concurrency rule to avoid stepping on benchmark db.
# Could eventually replace with a real db in coiled, RDS, or litestream
group: process-benchmarks
cancel-in-progress: false
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: pip install alembic
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: benchmarks
- name: Combine benchmarks
run: |
ls -lhR benchmarks
bash ci/scripts/combine-dbs.sh
- name: Upload benchmark results as artifact
uses: actions/upload-artifact@v4
with:
name: benchmark
path: benchmark.db
static-site:
needs: [discover_ab_envs, process-results]
# Always generate the site, as this can be skipped even if an indirect dependency fails (like a test run)
if: always() && fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
name: Build static dashboards
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: benchmark
- name: Set up environment
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Mambaforge
use-mamba: true
environment-file: ci/environment-dashboard.yml
- name: Generate dashboards
run: python dashboard.py -d benchmark.db -o static -b all
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: static-dashboard
path: static