dbt-labs · nathaniel-may · Aug 11, 2021 · Jul 20, 2021 · Jul 20, 2021 · Jul 21, 2021
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -0,0 +1,181 @@
+
+name: Performance Regression Testing
+# Schedule triggers
+on:
+  # TODO this is just while developing
+  pull_request:
+    branches:
+      - 'develop' 
+      - 'performance-regression-testing'
+  schedule:
+    # runs twice a day at 10:05am and 10:05pm
+    - cron:  '5 10,22 * * *'
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+
+  # checks fmt of runner code
+  # purposefully not a dependency of any other job
+  # will block merging, but not prevent developing
+  fmt:
+    name: Cargo fmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: rustup component add rustfmt
+      - uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --manifest-path performance/runner/Cargo.toml --all -- --check
+
+  # runs any tests associated with the runner
+  # these tests make sure the runner logic is correct
+  test-runner:
+    name: Test Runner
+    runs-on: ubuntu-latest
+    env:
+      # turns errors into warnings
+      RUSTFLAGS: "-D warnings"
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path performance/runner/Cargo.toml
+
+  # build an optimized binary to be used as the runner in later steps
+  build-runner:
+    needs: [test-runner]
+    name: Build Runner
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-D warnings"
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/runner/Cargo.toml
+      - uses: actions/upload-artifact@v2
+        with:
+          name: runner
+          path: performance/runner/target/release/runner
+
+  # run the performance measurements on the current or default branch
+  measure-dev:
+    needs: [build-runner]
+    name: Measure Dev Branch
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout dev
+        uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: '3.8'
+      - name: install dbt
+        run: pip install -r dev-requirements.txt -r editable-requirements.txt
+      - name: install hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+      - uses: actions/download-artifact@v2
+        with:
+          name: runner
+      - name: change permissions
+        run: chmod +x ./runner
+      - name: run
+        run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/
+      - uses: actions/upload-artifact@v2
+        with:
+          name: dev-results
+          path: performance/results/
+
+  # run the performance measurements on the release branch which we use
+  # as a performance baseline. This part takes by far the longest, so
+  # we do everything we can first so the job fails fast.
+  # -----
+  # we need to checkout dbt twice in this job: once for the baseline dbt
+  # version, and once to get the latest regression testing projects,
+  # metrics, and runner code from the develop or current branch so that
+  # the calculations match for both versions of dbt we are comparing.
+  measure-baseline:
+    needs: [build-runner]
+    name: Measure Baseline Branch
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout latest
+        uses: actions/checkout@v2
+        with:
+          ref: '0.20.latest'
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: '3.8'
+      - name: move repo up a level
+        run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline
+      - name: "[debug] ls new dbt location"
+        run: ls ${{ github.workspace }}/../baseline/dbt/
+      # installation creates egg-links so we have to preserve source
+      - name: install dbt from new location
+        run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
+      # checkout the current branch to get all the target projects
+      # this deletes the old checked out code which is why we had to copy before
+      - name: checkout dev
+        uses: actions/checkout@v2
+      - name: install hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+      - uses: actions/download-artifact@v2
+        with:
+          name: runner
+      - name: change permissions
+        run: chmod +x ./runner
+      - name: run runner
+        run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/
+      - uses: actions/upload-artifact@v2
+        with:
+          name: baseline-results
+          path: performance/results/
+
+  # detect regressions on the output generated from measuring
+  # the two branches. Exits with non-zero code if a regression is detected.
+  calculate-regressions:
+    needs: [measure-dev, measure-baseline]
+    name: Compare Results
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: dev-results
+      - uses: actions/download-artifact@v2
+        with:
+          name: baseline-results
+      - name: "[debug] ls result files"
+        run: ls
+      - uses: actions/download-artifact@v2
+        with:
+          name: runner
+      - name: change permissions
+        run: chmod +x ./runner
+      - name: run calculation
+        run: ./runner calculate -r ./
+        # always attempt to upload the results even if there were regressions found
+      - uses: actions/upload-artifact@v2
+        if: ${{ always() }}
+        with:
+          name: final-calculations
+          path: ./final_calculations.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@
 - Handle whitespace after a plus sign on the project config ([#3526](https://github.com/dbt-labs/dbt/pull/3526))
 
 ### Under the hood
+- Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602)
 - Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468))
 - Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473))
 - Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536))

diff --git a/performance/README.md b/performance/README.md
@@ -0,0 +1,16 @@
+# Performance Regression Testing
+This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
+
+The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards.
+
+## Adding a new dbt project
+Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.
+
+## Adding a new dbt command
+In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed.
+
+## Future work
+- add more projects to test different configurations that have been known bottlenecks
+- add more dbt commands to measure
+- possibly using the uploaded json artifacts to store these results so they can be graphed over time
+- reading new metrics from a file so no one has to edit rust source to add them to the suite
diff --git a/performance/project_config/.user.yml b/performance/project_config/.user.yml
@@ -0,0 +1 @@
+id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b
diff --git a/performance/project_config/profiles.yml b/performance/project_config/profiles.yml
@@ -0,0 +1,12 @@
+default:
+  target: dev
+  outputs:
+    dev:
+      type: postgres
+      host: localhost
+      user: dummy
+      password: dummy_password
+      port: 5432
+      dbname: dummy
+      schema: dummy
+      threads: 4
diff --git a/performance/projects/01_dummy_project/dbt_project.yml b/performance/projects/01_dummy_project/dbt_project.yml
@@ -0,0 +1,38 @@
+
+# Name your package! Package names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'my_new_package'
+version: 1.0.0
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project. Profiles contain
+# database connection information, and should be configured in the  ~/.dbt/profiles.yml file
+profile: 'default'
+
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that source models can be found
+# in the "models/" directory. You probably won't need to change these!
+source-paths: ["models"]
+analysis-paths: ["analysis"] 
+test-paths: ["tests"]
+data-paths: ["data"]
+macro-paths: ["macros"]
+
+target-path: "target"  # directory which will store compiled SQL files
+clean-targets:         # directories to be removed by `dbt clean`
+    - "target"
+    - "dbt_modules"
+
+# You can define configurations for models in the `source-paths` directory here.
+# Using these configurations, you can enable or disable models, change how they
+# are materialized, and more!
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as views (the default). These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  my_new_package:
+      # Applies to all files under models/example/
+      example:
+          materialized: view
diff --git a/performance/projects/01_dummy_project/models/path_0/node_0.sql b/performance/projects/01_dummy_project/models/path_0/node_0.sql
@@ -0,0 +1 @@
+select 1 as id
diff --git a/performance/projects/01_dummy_project/models/path_0/node_0.yml b/performance/projects/01_dummy_project/models/path_0/node_0.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_0
+version: 2
diff --git a/performance/projects/01_dummy_project/models/path_0/node_1.sql b/performance/projects/01_dummy_project/models/path_0/node_1.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
diff --git a/performance/projects/01_dummy_project/models/path_0/node_1.yml b/performance/projects/01_dummy_project/models/path_0/node_1.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_1
+version: 2
diff --git a/performance/projects/01_dummy_project/models/path_0/node_2.sql b/performance/projects/01_dummy_project/models/path_0/node_2.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
diff --git a/performance/projects/01_dummy_project/models/path_0/node_2.yml b/performance/projects/01_dummy_project/models/path_0/node_2.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_2
+version: 2
diff --git a/performance/projects/02_dummy_project/dbt_project.yml b/performance/projects/02_dummy_project/dbt_project.yml
@@ -0,0 +1,38 @@
+
+# Name your package! Package names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'my_new_package'
+version: 1.0.0
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project. Profiles contain
+# database connection information, and should be configured in the  ~/.dbt/profiles.yml file
+profile: 'default'
+
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that source models can be found
+# in the "models/" directory. You probably won't need to change these!
+source-paths: ["models"]
+analysis-paths: ["analysis"] 
+test-paths: ["tests"]
+data-paths: ["data"]
+macro-paths: ["macros"]
+
+target-path: "target"  # directory which will store compiled SQL files
+clean-targets:         # directories to be removed by `dbt clean`
+    - "target"
+    - "dbt_modules"
+
+# You can define configurations for models in the `source-paths` directory here.
+# Using these configurations, you can enable or disable models, change how they
+# are materialized, and more!
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as views (the default). These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  my_new_package:
+      # Applies to all files under models/example/
+      example:
+          materialized: view
diff --git a/performance/projects/02_dummy_project/models/path_0/node_0.sql b/performance/projects/02_dummy_project/models/path_0/node_0.sql
@@ -0,0 +1 @@
+select 1 as id
diff --git a/performance/projects/02_dummy_project/models/path_0/node_0.yml b/performance/projects/02_dummy_project/models/path_0/node_0.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_0
+version: 2
diff --git a/performance/projects/02_dummy_project/models/path_0/node_1.sql b/performance/projects/02_dummy_project/models/path_0/node_1.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
diff --git a/performance/projects/02_dummy_project/models/path_0/node_1.yml b/performance/projects/02_dummy_project/models/path_0/node_1.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_1
+version: 2
diff --git a/performance/projects/02_dummy_project/models/path_0/node_2.sql b/performance/projects/02_dummy_project/models/path_0/node_2.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
diff --git a/performance/projects/02_dummy_project/models/path_0/node_2.yml b/performance/projects/02_dummy_project/models/path_0/node_2.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_2
+version: 2