Skip to content

Commit

Permalink
Merge pull request #3602 from dbt-labs/performance-regression-testing
Browse files Browse the repository at this point in the history
Add Performance Regression Testing [Rust]
  • Loading branch information
Nathaniel May committed Aug 11, 2021
2 parents 4541682 + 1fe5375 commit 1a98460
Show file tree
Hide file tree
Showing 27 changed files with 1,326 additions and 0 deletions.
181 changes: 181 additions & 0 deletions .github/workflows/performance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@

name: Performance Regression Testing
# Schedule triggers
on:
# TODO this is just while developing
pull_request:
branches:
- 'develop'
- 'performance-regression-testing'
schedule:
# runs twice a day at 10:05am and 10:05pm
- cron: '5 10,22 * * *'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:

# checks fmt of runner code
# purposefully not a dependency of any other job
# will block merging, but not prevent developing
fmt:
name: Cargo fmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --manifest-path performance/runner/Cargo.toml --all -- --check

# runs any tests associated with the runner
# these tests make sure the runner logic is correct
test-runner:
name: Test Runner
runs-on: ubuntu-latest
env:
# turns errors into warnings
RUSTFLAGS: "-D warnings"
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --manifest-path performance/runner/Cargo.toml

# build an optimized binary to be used as the runner in later steps
build-runner:
needs: [test-runner]
name: Build Runner
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-D warnings"
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: build
args: --release --manifest-path performance/runner/Cargo.toml
- uses: actions/upload-artifact@v2
with:
name: runner
path: performance/runner/target/release/runner

# run the performance measurements on the current or default branch
measure-dev:
needs: [build-runner]
name: Measure Dev Branch
runs-on: ubuntu-latest
steps:
- name: checkout dev
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: '3.8'
- name: install dbt
run: pip install -r dev-requirements.txt -r editable-requirements.txt
- name: install hyperfine
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run
run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/
- uses: actions/upload-artifact@v2
with:
name: dev-results
path: performance/results/

# run the performance measurements on the release branch which we use
# as a performance baseline. This part takes by far the longest, so
# we do everything we can first so the job fails fast.
# -----
# we need to checkout dbt twice in this job: once for the baseline dbt
# version, and once to get the latest regression testing projects,
# metrics, and runner code from the develop or current branch so that
# the calculations match for both versions of dbt we are comparing.
measure-baseline:
needs: [build-runner]
name: Measure Baseline Branch
runs-on: ubuntu-latest
steps:
- name: checkout latest
uses: actions/checkout@v2
with:
ref: '0.20.latest'
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: '3.8'
- name: move repo up a level
run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline
- name: "[debug] ls new dbt location"
run: ls ${{ github.workspace }}/../baseline/dbt/
# installation creates egg-links so we have to preserve source
- name: install dbt from new location
run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
# checkout the current branch to get all the target projects
# this deletes the old checked out code which is why we had to copy before
- name: checkout dev
uses: actions/checkout@v2
- name: install hyperfine
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run runner
run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/
- uses: actions/upload-artifact@v2
with:
name: baseline-results
path: performance/results/

# detect regressions on the output generated from measuring
# the two branches. Exits with non-zero code if a regression is detected.
calculate-regressions:
needs: [measure-dev, measure-baseline]
name: Compare Results
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v2
with:
name: dev-results
- uses: actions/download-artifact@v2
with:
name: baseline-results
- name: "[debug] ls result files"
run: ls
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run calculation
run: ./runner calculate -r ./
# always attempt to upload the results even if there were regressions found
- uses: actions/upload-artifact@v2
if: ${{ always() }}
with:
name: final-calculations
path: ./final_calculations.json
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- Fix for RPC requests that raise a RecursionError when serializing Undefined values as JSON ([#3464](https://github.com/dbt-labs/dbt/issues/3464), [#3687](https://github.com/dbt-labs/dbt/pull/3687))

### Under the hood
- Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602)
- Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468))
- Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473))
- Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536))
Expand Down
18 changes: 18 additions & 0 deletions performance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Performance Regression Testing
This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.

The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards.

## Adding a new dbt project
Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.

## Adding a new dbt command
In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed.

## Future work
- add more projects to test different configurations that have been known bottlenecks
- add more dbt commands to measure
- possibly using the uploaded json artifacts to store these results so they can be graphed over time
- reading new metrics from a file so no one has to edit rust source to add them to the suite
- instead of building the rust every time, we could publish and pull down the latest version.
- instead of manually setting the baseline version of dbt to test, pull down the latest stable version as the baseline.
1 change: 1 addition & 0 deletions performance/project_config/.user.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b
12 changes: 12 additions & 0 deletions performance/project_config/profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
default:
target: dev
outputs:
dev:
type: postgres
host: localhost
user: dummy
password: dummy_password
port: 5432
dbname: dummy
schema: dummy
threads: 4
38 changes: 38 additions & 0 deletions performance/projects/01_dummy_project/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'my_new_package'
version: 1.0.0
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'default'

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"

# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!

# In this example config, we tell dbt to build all models in the example/ directory
# as views (the default). These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
my_new_package:
# Applies to all files under models/example/
example:
materialized: view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select 1 as id
11 changes: 11 additions & 0 deletions performance/projects/01_dummy_project/models/path_0/node_0.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_0
version: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
11 changes: 11 additions & 0 deletions performance/projects/01_dummy_project/models/path_0/node_1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_1
version: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
11 changes: 11 additions & 0 deletions performance/projects/01_dummy_project/models/path_0/node_2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_2
version: 2
38 changes: 38 additions & 0 deletions performance/projects/02_dummy_project/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'my_new_package'
version: 1.0.0
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'default'

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"

# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!

# In this example config, we tell dbt to build all models in the example/ directory
# as views (the default). These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
my_new_package:
# Applies to all files under models/example/
example:
materialized: view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select 1 as id
11 changes: 11 additions & 0 deletions performance/projects/02_dummy_project/models/path_0/node_0.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_0
version: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
11 changes: 11 additions & 0 deletions performance/projects/02_dummy_project/models/path_0/node_1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_1
version: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
Loading

0 comments on commit 1a98460

Please sign in to comment.