From 9f62ec215344e7aa09327d4cd3328381b898092e Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 20 Jul 2021 09:28:57 -0400
Subject: [PATCH 01/81] add rust performance runner

---
 performance/README.md          | 13 +++++++
 performance/results/.gitignore |  5 +++
 performance/runner/.gitignore  |  2 +
 performance/runner/Cargo.lock  |  7 ++++
 performance/runner/Cargo.toml  |  8 ++++
 performance/runner/src/main.rs | 70 ++++++++++++++++++++++++++++++++++
 6 files changed, 105 insertions(+)
 create mode 100644 performance/README.md
 create mode 100644 performance/results/.gitignore
 create mode 100644 performance/runner/.gitignore
 create mode 100644 performance/runner/Cargo.lock
 create mode 100644 performance/runner/Cargo.toml
 create mode 100644 performance/runner/src/main.rs
diff --git a/performance/README.md b/performance/README.md
new file mode 100644
index 00000000000..8b9ce3058ab
--- /dev/null
+++ b/performance/README.md
@@ -0,0 +1,13 @@
+# Performance Testing
+This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
+
+## Adding a new project
+Just make a new directory under projects. It will automatically be picked up by the tests.
+
+## Adding a new dbt command
+In `runner/src/main.rs` add a metric to the `metrics` Vec in the main function. The Github Action will handle recompilation.
+
+## Future work
+- add more projects to test different configurations that have been known bottlenecks
+- add more dbt commands to measure
+- consider storing these results so they can be graphed over time
diff --git a/performance/results/.gitignore b/performance/results/.gitignore
new file mode 100644
index 00000000000..b8e28556b4b
--- /dev/null
+++ b/performance/results/.gitignore
@@ -0,0 +1,5 @@
+# all files here are generated results
+*
+
+# except this one
+!.gitignore
diff --git a/performance/runner/.gitignore b/performance/runner/.gitignore
new file mode 100644
index 00000000000..054a2acc8a4
--- /dev/null
+++ b/performance/runner/.gitignore
@@ -0,0 +1,2 @@
+target/
+projects/*/logs
diff --git a/performance/runner/Cargo.lock b/performance/runner/Cargo.lock
new file mode 100644
index 00000000000..0832b7312b5
--- /dev/null
+++ b/performance/runner/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "runner"
+version = "0.1.0"
diff --git a/performance/runner/Cargo.toml b/performance/runner/Cargo.toml
new file mode 100644
index 00000000000..77ff9695b77
--- /dev/null
+++ b/performance/runner/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "runner"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
new file mode 100644
index 00000000000..31420ffe923
--- /dev/null
+++ b/performance/runner/src/main.rs
@@ -0,0 +1,70 @@
+use std::{env, fs, io};
+use std::process::{Command, ExitStatus, exit};
+
+
+#[derive(Debug, Clone)]
+struct Metric<'a> {
+    name: &'a str,
+    prepare: &'a str,
+    cmd: &'a str,
+}
+
+impl Metric<'_> {
+    fn outfile(&self, project: &str) -> String {
+        [self.name, "_", project, ".json"].join("")
+    }
+}
+
+fn main() {
+    // TODO args lib
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        println!("please provide the git project directory root");
+        exit(1);
+    }
+    let git_directory = &args[1];
+
+    // to add a new metric to the test suite, simply define it in this list:
+    let metrics: Vec<Metric> = vec![
+        Metric { name:"parse", prepare: "rm -rf target/", cmd: "dbt parse --no-version-check" },
+    ];
+
+    // list out all projects
+    let project_entries = fs::read_dir([&git_directory, "performance/projects"].join("")).unwrap();
+
+    let results: Result<Vec<ExitStatus>, io::Error > = project_entries.map(|entry| {
+        metrics.clone().into_iter().map(|metric| {
+            let path = entry.as_ref().unwrap().path();
+            let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
+
+            Command::new("hyperfine")
+                .current_dir(&path)
+                .arg("--prepare")
+                .arg(metric.prepare)
+                .arg(metric.cmd)
+                .arg("--export-json")
+                .arg([&git_directory, "/performance/results/", &metric.outfile(project_name)].join(""))
+                // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
+                .arg("--show-output")
+                .status() // use spawn() here instead for more information
+        }).collect::<Vec<Result<ExitStatus, io::Error>>>()
+    }).flatten().collect();
+
+    // only exit with status code 0 if everything ran as expected
+    match results {
+        // if dispatch of any of the commands failed, panic with that error
+        Err(e) => panic!("{}", e),
+        Ok(statuses) => {
+            for status in statuses {
+                match status.code() {
+                    None => (),
+                    Some(0) => (),
+                    // if any child command exited with a non zero status code, exit with the same one here.
+                    Some(nonzero) => exit(nonzero),
+                }
+            }
+            // everything ran as expected
+            exit(0);
+        },
+    }
+}

From 9e3da391a787f42e2460bc2350a4c9970a59d6a3 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 20 Jul 2021 14:02:10 -0400
Subject: [PATCH 02/81] draft of regression testing workflow

---
 .github/workflows/performance.yml             | 140 ++++++++++++++++++
 .../dbt_project.yml                           |  38 +++++
 .../models/path_0/node_0.sql                  |   1 +
 .../models/path_0/node_0.yml                  |  11 ++
 .../models/path_0/node_1.sql                  |   3 +
 .../models/path_0/node_1.yml                  |  11 ++
 .../models/path_0/node_2.sql                  |   3 +
 .../models/path_0/node_2.yml                  |  11 ++
 .../dbt_project.yml                           |  38 +++++
 .../models/path_0/node_0.sql                  |   1 +
 .../models/path_0/node_0.yml                  |  11 ++
 .../models/path_0/node_1.sql                  |   3 +
 .../models/path_0/node_1.yml                  |  11 ++
 .../models/path_0/node_2.sql                  |   3 +
 .../models/path_0/node_2.yml                  |  11 ++
 performance/runner/src/main.rs                |  10 +-
 16 files changed, 302 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/performance.yml
 create mode 100644 performance/projects/01_mini_project_dont_merge/dbt_project.yml
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql
 create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml
 create mode 100644 performance/projects/02_mini_project_dont_merge/dbt_project.yml
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql
 create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
new file mode 100644
index 00000000000..5053f8ad9a5
--- /dev/null
+++ b/.github/workflows/performance.yml
@@ -0,0 +1,140 @@
+
+# Schedule triggers
+on:
+  # TODO this is just while developing
+  pull_request:
+    branches:
+      - 'develop' 
+      - 'performance-regression-testing'
+  schedule:
+    # runs every day at 10:05pm
+    - cron:  '5 22 * * *'
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+
+  # runs any tests associated with the runner
+  # these tests make sure the runner logic is correct
+  # if there are no tests, it still quickly builds the project to fail fast
+  test-rust:
+    name: Test Runner
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path performance/runner/Cargo.toml
+
+  # build an optimized binary to be used as the runner in later steps
+  build-rust:
+    needs: [test-rust]
+    name: Build Runner
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-D warnings"
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/runner/Cargo.toml
+      - uses: actions/upload-artifact@v2
+        with:
+          name: runner
+          path: performance/runner/target/release/runner
+
+  # run the performance measurements on the current or default branch
+  measure-dev:
+    needs: [build-rust]
+    name: Measure Dev Branch
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout dev
+        uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: '3.8'
+      - name: install dbt
+        run: pip install -r dev-requirements.txt -r editable-requirements.txt
+      - name: install hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+      - uses: actions/download-artifact@v2
+        with:
+          name: runner
+      - name: change permissions
+        run: chmod +x ./runner
+      - name: run
+        run: ./runner ${{ github.workspace }}/performance/projects/
+      - uses: actions/upload-artifact@v2
+        with:
+          name: dev-results
+          path: performance/results/
+
+  # run the performance measurements on the latest release branch
+  measure-latest:
+    needs: [build-rust]
+    name: Measure Latest Branch
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout latest
+        uses: actions/checkout@v2
+        with:
+          ref: '0.20.latest'
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: '3.8'
+      - name: move repo up a level
+        run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest
+      - name: debug line
+        run: ls ${{ github.workspace }}/../latest/dbt/
+      # installation creates egg-links so we have to preserve source
+      - name: install dbt from new location
+        run: cd ${{ github.workspace }}/../latest/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
+      # checkout the current branch to get all the target projects
+      # this deletes the old checked out code which is why we had to copy before
+      - name: checkout dev
+        uses: actions/checkout@v2
+      - name: install hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+      - uses: actions/download-artifact@v2
+        with:
+          name: runner
+      - name: change permissions
+        run: chmod +x ./runner
+      - name: run runner
+        run: ./runner ${{ github.workspace }}/performance/projects/
+      - uses: actions/upload-artifact@v2
+        with:
+          name: latest-results
+          path: performance/results/
+
+  compare-results:
+    needs: [measure-dev, measure-latest]
+    name: Compare Results
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: dev-results
+      - name: ls result files
+        run: ls
+      - uses: actions/download-artifact@v2
+        with:
+          name: latest-results
+      - name: ls result files
+        run: ls
+      # TODO actually compare
diff --git a/performance/projects/01_mini_project_dont_merge/dbt_project.yml b/performance/projects/01_mini_project_dont_merge/dbt_project.yml
new file mode 100644
index 00000000000..95e1020def8
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/dbt_project.yml
@@ -0,0 +1,38 @@
+
+# Name your package! Package names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'my_new_package'
+version: 1.0.0
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project. Profiles contain
+# database connection information, and should be configured in the  ~/.dbt/profiles.yml file
+profile: 'default'
+
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that source models can be found
+# in the "models/" directory. You probably won't need to change these!
+source-paths: ["models"]
+analysis-paths: ["analysis"] 
+test-paths: ["tests"]
+data-paths: ["data"]
+macro-paths: ["macros"]
+
+target-path: "target"  # directory which will store compiled SQL files
+clean-targets:         # directories to be removed by `dbt clean`
+    - "target"
+    - "dbt_modules"
+
+# You can define configurations for models in the `source-paths` directory here.
+# Using these configurations, you can enable or disable models, change how they
+# are materialized, and more!
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as views (the default). These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  my_new_package:
+      # Applies to all files under models/example/
+      example:
+          materialized: view
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql
new file mode 100644
index 00000000000..26d9cae7b5b
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql
@@ -0,0 +1 @@
+select 1 as id
\ No newline at end of file
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml
new file mode 100644
index 00000000000..282e56882f0
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_0
+version: 2
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql
new file mode 100644
index 00000000000..cb5b97f45c1
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
\ No newline at end of file
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml
new file mode 100644
index 00000000000..2899ddf532f
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_1
+version: 2
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql
new file mode 100644
index 00000000000..cb5b97f45c1
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
\ No newline at end of file
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml
new file mode 100644
index 00000000000..d2582eabbd8
--- /dev/null
+++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_2
+version: 2
diff --git a/performance/projects/02_mini_project_dont_merge/dbt_project.yml b/performance/projects/02_mini_project_dont_merge/dbt_project.yml
new file mode 100644
index 00000000000..95e1020def8
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/dbt_project.yml
@@ -0,0 +1,38 @@
+
+# Name your package! Package names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'my_new_package'
+version: 1.0.0
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project. Profiles contain
+# database connection information, and should be configured in the  ~/.dbt/profiles.yml file
+profile: 'default'
+
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that source models can be found
+# in the "models/" directory. You probably won't need to change these!
+source-paths: ["models"]
+analysis-paths: ["analysis"] 
+test-paths: ["tests"]
+data-paths: ["data"]
+macro-paths: ["macros"]
+
+target-path: "target"  # directory which will store compiled SQL files
+clean-targets:         # directories to be removed by `dbt clean`
+    - "target"
+    - "dbt_modules"
+
+# You can define configurations for models in the `source-paths` directory here.
+# Using these configurations, you can enable or disable models, change how they
+# are materialized, and more!
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as views (the default). These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  my_new_package:
+      # Applies to all files under models/example/
+      example:
+          materialized: view
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql
new file mode 100644
index 00000000000..26d9cae7b5b
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql
@@ -0,0 +1 @@
+select 1 as id
\ No newline at end of file
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml
new file mode 100644
index 00000000000..282e56882f0
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_0
+version: 2
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql
new file mode 100644
index 00000000000..cb5b97f45c1
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
\ No newline at end of file
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml
new file mode 100644
index 00000000000..2899ddf532f
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_1
+version: 2
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql
new file mode 100644
index 00000000000..cb5b97f45c1
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql
@@ -0,0 +1,3 @@
+select 1 as id
+union all
+select * from {{ ref('node_0') }}
\ No newline at end of file
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml
new file mode 100644
index 00000000000..d2582eabbd8
--- /dev/null
+++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml
@@ -0,0 +1,11 @@
+models:
+- columns:
+  - name: id
+    tests:
+    - unique
+    - not_null
+    - relationships:
+        field: id
+        to: node_0
+  name: node_2
+version: 2
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 31420ffe923..2648be5f061 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -1,4 +1,5 @@
 use std::{env, fs, io};
+use std::path::Path;
 use std::process::{Command, ExitStatus, exit};
 
 
@@ -19,10 +20,10 @@ fn main() {
     // TODO args lib
     let args: Vec<String> = env::args().collect();
     if args.len() < 2 {
-        println!("please provide the git project directory root");
+        println!("please provide the target projects directory");
         exit(1);
     }
-    let git_directory = &args[1];
+    let projects_directory = &args[1];
 
     // to add a new metric to the test suite, simply define it in this list:
     let metrics: Vec<Metric> = vec![
@@ -30,7 +31,8 @@ fn main() {
     ];
 
     // list out all projects
-    let project_entries = fs::read_dir([&git_directory, "performance/projects"].join("")).unwrap();
+    let path = Path::new(&projects_directory);
+    let project_entries = fs::read_dir(path).unwrap();
 
     let results: Result<Vec<ExitStatus>, io::Error > = project_entries.map(|entry| {
         metrics.clone().into_iter().map(|metric| {
@@ -43,7 +45,7 @@ fn main() {
                 .arg(metric.prepare)
                 .arg(metric.cmd)
                 .arg("--export-json")
-                .arg([&git_directory, "/performance/results/", &metric.outfile(project_name)].join(""))
+                .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
                 // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
                 .arg("--show-output")
                 .status() // use spawn() here instead for more information

From ee58d27d94cf13c6e3e2b8f2bf2bb17327396f5e Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 21 Jul 2021 17:28:28 -0400
Subject: [PATCH 03/81] use a profiles.yml file

---
 performance/projects/config/profiles.yml | 12 ++++++++++++
 performance/runner/src/main.rs           |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 performance/projects/config/profiles.yml

diff --git a/performance/projects/config/profiles.yml b/performance/projects/config/profiles.yml
new file mode 100644
index 00000000000..7b1c39a1f9e
--- /dev/null
+++ b/performance/projects/config/profiles.yml
@@ -0,0 +1,12 @@
+default:
+  target: dev
+  outputs:
+    dev:
+      type: postgres
+      host: localhost
+      user: dummy
+      password: dummy_password
+      port: 5432
+      dbname: dummy
+      schema: dummy
+      threads: 4
\ No newline at end of file
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 2648be5f061..3cb91e991bd 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -43,7 +43,7 @@ fn main() {
                 .current_dir(&path)
                 .arg("--prepare")
                 .arg(metric.prepare)
-                .arg(metric.cmd)
+                .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join(""))
                 .arg("--export-json")
                 .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
                 // this prevents capture dbt output. Noisy, but good for debugging when tests fail.

From d0773f33460b1c0bff7595c478bceefa4d26419d Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 21 Jul 2021 17:34:22 -0400
Subject: [PATCH 04/81] warmup fs caches

---
 performance/runner/src/main.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 3cb91e991bd..31a61801986 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -41,6 +41,9 @@ fn main() {
 
             Command::new("hyperfine")
                 .current_dir(&path)
+                // warms filesystem caches by running the command first without counting it.
+                // alternatively we could clear them before each run
+                .arg("--warmup 1")
                 .arg("--prepare")
                 .arg(metric.prepare)
                 .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join(""))

From 4ff3f6d4e84b7cb4c0184da2f207d181a8f51449 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 21 Jul 2021 17:35:54 -0400
Subject: [PATCH 05/81] moved config dir out of projects dir

---
 performance/{projects/config => project_config}/profiles.yml | 0
 performance/runner/src/main.rs                               | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename performance/{projects/config => project_config}/profiles.yml (100%)

diff --git a/performance/projects/config/profiles.yml b/performance/project_config/profiles.yml
similarity index 100%
rename from performance/projects/config/profiles.yml
rename to performance/project_config/profiles.yml
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 31a61801986..eaddd2363cd 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -46,7 +46,7 @@ fn main() {
                 .arg("--warmup 1")
                 .arg("--prepare")
                 .arg(metric.prepare)
-                .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join(""))
+                .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join(""))
                 .arg("--export-json")
                 .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
                 // this prevents capture dbt output. Noisy, but good for debugging when tests fail.

From 75d3d87d64a47f6e7cfb90bda761f6afa7a4eaf4 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 21 Jul 2021 18:00:07 -0400
Subject: [PATCH 06/81] fix warmup syntax

---
 performance/runner/src/main.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index eaddd2363cd..b57538bdefd 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -43,7 +43,8 @@ fn main() {
                 .current_dir(&path)
                 // warms filesystem caches by running the command first without counting it.
                 // alternatively we could clear them before each run
-                .arg("--warmup 1")
+                .arg("--warmup")
+                .arg("1")
                 .arg("--prepare")
                 .arg(metric.prepare)
                 .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join(""))
@@ -58,7 +59,7 @@ fn main() {
     // only exit with status code 0 if everything ran as expected
     match results {
         // if dispatch of any of the commands failed, panic with that error
-        Err(e) => panic!("{}", e),
+        Err(e) => panic!("{:?}", e),
         Ok(statuses) => {
             for status in statuses {
                 match status.code() {

From 1d7e8349ede8514218968c800660f14898efd312 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 12:01:30 -0400
Subject: [PATCH 07/81] draft of comparison

---
 performance/comparison/Cargo.lock  | 89 ++++++++++++++++++++++++++++++
 performance/comparison/Cargo.toml  |  8 +++
 performance/comparison/src/main.rs | 53 ++++++++++++++++++
 3 files changed, 150 insertions(+)
 create mode 100644 performance/comparison/Cargo.lock
 create mode 100644 performance/comparison/Cargo.toml
 create mode 100644 performance/comparison/src/main.rs

diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock
new file mode 100644
index 00000000000..4bf9976c9a0
--- /dev/null
+++ b/performance/comparison/Cargo.lock
@@ -0,0 +1,89 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "comparison"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "itoa"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
+
+[[package]]
+name = "serde"
+version = "1.0.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml
new file mode 100644
index 00000000000..df15857b46e
--- /dev/null
+++ b/performance/comparison/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "comparison"
+version = "0.1.0"
+edition = "2018"
+
+[dependencies]
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
new file mode 100644
index 00000000000..e3b7335c3cb
--- /dev/null
+++ b/performance/comparison/src/main.rs
@@ -0,0 +1,53 @@
+use serde::{Deserialize, Serialize};
+
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct Measurement {
+    command: String,
+    mean: f64,
+    stddev: f64,
+    median: f64,
+    user: f64,
+    system: f64,
+    min: f64,
+    max: f64,
+    times: Vec<f64>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct Measurements {
+    results: Vec<Measurement>,
+}
+
+fn main() {
+    let data = r#"{
+"results": [
+    {
+    "command": "dbt parse --no-version-check",
+    "mean": 3.050361809595,
+    "stddev": 0.04615108237908544,
+    "median": 3.034926999095,
+    "user": 2.7580889199999996,
+    "system": 0.22864483500000002,
+    "min": 3.005364834595,
+    "max": 3.145752726595,
+    "times": [
+        3.023607223595,
+        3.0423504165949997,
+        3.145752726595,
+        3.114517602595,
+        3.062246401595,
+        3.051930277595,
+        3.027503581595,
+        3.013381462595,
+        3.005364834595,
+        3.016963568595
+    ]
+    }
+]
+}"#;
+    
+    let measurements: Measurements = serde_json::from_str(data).unwrap();
+
+    println!("{:?}", measurements);
+}
\ No newline at end of file

From a29367b7fe50e6b87a8eac5b717162e183da67f4 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 12:30:35 -0400
Subject: [PATCH 08/81] read files and parse json

---
 performance/comparison/src/main.rs | 62 +++++++++++++++---------------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index e3b7335c3cb..e42d879481c 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -1,5 +1,8 @@
 use serde::{Deserialize, Serialize};
-
+use serde_json::Result;
+use std::path::Path;
+use std::process::exit;
+use std::{env, fs};
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct Measurement {
@@ -20,34 +23,33 @@ struct Measurements {
 }
 
 fn main() {
-    let data = r#"{
-"results": [
-    {
-    "command": "dbt parse --no-version-check",
-    "mean": 3.050361809595,
-    "stddev": 0.04615108237908544,
-    "median": 3.034926999095,
-    "user": 2.7580889199999996,
-    "system": 0.22864483500000002,
-    "min": 3.005364834595,
-    "max": 3.145752726595,
-    "times": [
-        3.023607223595,
-        3.0423504165949997,
-        3.145752726595,
-        3.114517602595,
-        3.062246401595,
-        3.051930277595,
-        3.027503581595,
-        3.013381462595,
-        3.005364834595,
-        3.016963568595
-    ]
+    // TODO args lib
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        println!("please provide the results directory");
+        exit(1);
     }
-]
-}"#;
-    
-    let measurements: Measurements = serde_json::from_str(data).unwrap();
+    let results_directory = &args[1];
+
+    let path = Path::new(&results_directory);
+    let result_files = fs::read_dir(path).unwrap();
+    let x: Result<Vec<Measurements>> = result_files
+        .into_iter()
+        .map(|f| {
+            f.unwrap().path()
+        })
+        .filter(|filename| {
+            filename
+                .extension()
+                .and_then(|ext| ext.to_str())
+                .map_or(false, |ext| ext.ends_with("json"))
+        })
+        .map(|filename| {
+            println!("{:?}", filename);
+            let contents = fs::read_to_string(filename).unwrap();
+            serde_json::from_str::<Measurements>(&contents)
+        })
+        .collect();
 
-    println!("{:?}", measurements);
-}
\ No newline at end of file
+    println!("{:?}", x);
+}

From 108b55bdc3e49266c55f83955c31ee4406d7d874 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 13:50:17 -0400
Subject: [PATCH 09/81] add regression type

---
 performance/comparison/src/main.rs | 38 ++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index e42d879481c..7dbb3d91eb1 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -22,6 +22,28 @@ struct Measurements {
     results: Vec<Measurement>,
 }
 
+struct Regression {
+    threshold: f64,
+    difference: f64,
+    base: Measurement,
+    latest: Measurement,
+}
+
+fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
+    let threshold = 1.05; // 5% regression threshold
+    let difference = latest.median / base.median;
+    if difference > threshold {
+        Some(Regression {
+            threshold: threshold,
+            difference: difference,
+            base: base.clone(),
+            latest: latest.clone(),
+        })
+    } else {
+        None
+    }
+}
+
 fn main() {
     // TODO args lib
     let args: Vec<String> = env::args().collect();
@@ -33,11 +55,9 @@ fn main() {
 
     let path = Path::new(&results_directory);
     let result_files = fs::read_dir(path).unwrap();
-    let x: Result<Vec<Measurements>> = result_files
+    let x: Result<Vec<Measurement>> = result_files
         .into_iter()
-        .map(|f| {
-            f.unwrap().path()
-        })
+        .map(|f| f.unwrap().path())
         .filter(|filename| {
             filename
                 .extension()
@@ -47,9 +67,17 @@ fn main() {
         .map(|filename| {
             println!("{:?}", filename);
             let contents = fs::read_to_string(filename).unwrap();
-            serde_json::from_str::<Measurements>(&contents)
+            let measurements: Result<Measurements> = serde_json::from_str(&contents);
+            // the way we're running these, the files will each contain exactly one measurement
+            measurements.map(|x| x.results[0].clone())
         })
         .collect();
 
+    // TODO exit(1) when Regression is present
+    match x {
+        Err(e) => panic!("{}", e),
+        Ok(_) => (),
+    }
+
     println!("{:?}", x);
 }

From c0e2023c817979fa73d8a45848a8736166bcc76c Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 15:52:28 -0400
Subject: [PATCH 10/81] update profiles dir path

---
 performance/runner/src/main.rs | 62 +++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index b57538bdefd..2f0eedb5ca4 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -1,7 +1,6 @@
-use std::{env, fs, io};
 use std::path::Path;
-use std::process::{Command, ExitStatus, exit};
-
+use std::process::{exit, Command, ExitStatus};
+use std::{env, fs, io};
 
 #[derive(Debug, Clone)]
 struct Metric<'a> {
@@ -26,35 +25,44 @@ fn main() {
     let projects_directory = &args[1];
 
     // to add a new metric to the test suite, simply define it in this list:
-    let metrics: Vec<Metric> = vec![
-        Metric { name:"parse", prepare: "rm -rf target/", cmd: "dbt parse --no-version-check" },
-    ];
+    let metrics: Vec<Metric> = vec![Metric {
+        name: "parse",
+        prepare: "rm -rf target/",
+        cmd: "dbt parse --no-version-check",
+    }];
 
     // list out all projects
     let path = Path::new(&projects_directory);
     let project_entries = fs::read_dir(path).unwrap();
 
-    let results: Result<Vec<ExitStatus>, io::Error > = project_entries.map(|entry| {
-        metrics.clone().into_iter().map(|metric| {
-            let path = entry.as_ref().unwrap().path();
-            let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
+    let results: Result<Vec<ExitStatus>, io::Error> = project_entries
+        .map(|entry| {
+            metrics
+                .clone()
+                .into_iter()
+                .map(|metric| {
+                    let path = entry.as_ref().unwrap().path();
+                    let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
 
-            Command::new("hyperfine")
-                .current_dir(&path)
-                // warms filesystem caches by running the command first without counting it.
-                // alternatively we could clear them before each run
-                .arg("--warmup")
-                .arg("1")
-                .arg("--prepare")
-                .arg(metric.prepare)
-                .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join(""))
-                .arg("--export-json")
-                .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
-                // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
-                .arg("--show-output")
-                .status() // use spawn() here instead for more information
-        }).collect::<Vec<Result<ExitStatus, io::Error>>>()
-    }).flatten().collect();
+                    Command::new("hyperfine")
+                        .current_dir(&path)
+                        // warms filesystem caches by running the command first without counting it.
+                        // alternatively we could clear them before each run
+                        .arg("--warmup")
+                        .arg("1")
+                        .arg("--prepare")
+                        .arg(metric.prepare)
+                        .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
+                        .arg("--export-json")
+                        .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
+                        // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
+                        .arg("--show-output")
+                        .status() // use spawn() here instead for more information
+                })
+                .collect::<Vec<Result<ExitStatus, io::Error>>>()
+        })
+        .flatten()
+        .collect();
 
     // only exit with status code 0 if everything ran as expected
     match results {
@@ -71,6 +79,6 @@ fn main() {
             }
             // everything ran as expected
             exit(0);
-        },
+        }
     }
 }

From 34e2c4f90bd120000ff7e06cf7a2cd1043d29c3c Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 16:16:21 -0400
Subject: [PATCH 11/81] fix results output dir

---
 performance/runner/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 2f0eedb5ca4..5dd8d53928e 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -54,7 +54,7 @@ fn main() {
                         .arg(metric.prepare)
                         .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                         .arg("--export-json")
-                        .arg([&projects_directory, "/", &metric.outfile(project_name)].join(""))
+                        .arg(["../../results/", &metric.outfile(project_name)].join(""))
                         // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
                         .arg("--show-output")
                         .status() // use spawn() here instead for more information

From a45c9d019253054a55d7878b704bc2044dc9d3e1 Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 16:24:46 -0400
Subject: [PATCH 12/81] add new arg for runner for branch name

---
 .github/workflows/performance.yml |  4 ++--
 performance/runner/src/main.rs    | 13 ++++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 5053f8ad9a5..478f12eda63 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -77,7 +77,7 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run
-        run: ./runner ${{ github.workspace }}/performance/projects/
+        run: ./runner ${{ github.workspace }}/performance/projects/ dev
       - uses: actions/upload-artifact@v2
         with:
           name: dev-results
@@ -116,7 +116,7 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run runner
-        run: ./runner ${{ github.workspace }}/performance/projects/
+        run: ./runner ${{ github.workspace }}/performance/projects/ latest
       - uses: actions/upload-artifact@v2
         with:
           name: latest-results
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 5dd8d53928e..86c8e01cdd1 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -10,19 +10,20 @@ struct Metric<'a> {
 }
 
 impl Metric<'_> {
-    fn outfile(&self, project: &str) -> String {
-        [self.name, "_", project, ".json"].join("")
+    fn outfile(&self, project: &str, branch: &str) -> String {
+        [branch, "_", self.name, "_", project, ".json"].join("")
     }
 }
 
 fn main() {
     // TODO args lib
     let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("please provide the target projects directory");
+    if args.len() < 3 {
+        println!("please provide the target projects directory and branch name");
         exit(1);
     }
     let projects_directory = &args[1];
+    let dbt_branch = &args[2];
 
     // to add a new metric to the test suite, simply define it in this list:
     let metrics: Vec<Metric> = vec![Metric {
@@ -54,7 +55,9 @@ fn main() {
                         .arg(metric.prepare)
                         .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                         .arg("--export-json")
-                        .arg(["../../results/", &metric.outfile(project_name)].join(""))
+                        .arg(
+                            ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""),
+                        )
                         // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
                         .arg("--show-output")
                         .status() // use spawn() here instead for more information

From 4c06689ff56006a2f661dd77898277c7a597b83a Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 16:54:57 -0400
Subject: [PATCH 13/81] create a tuple of measurements to group results by

---
 performance/comparison/src/main.rs | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 7dbb3d91eb1..67f481f9669 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -55,7 +55,7 @@ fn main() {
 
     let path = Path::new(&results_directory);
     let result_files = fs::read_dir(path).unwrap();
-    let x: Result<Vec<Measurement>> = result_files
+    let x: Result<Vec<(String, String, Measurement)>> = result_files
         .into_iter()
         .map(|f| f.unwrap().path())
         .filter(|filename| {
@@ -66,13 +66,22 @@ fn main() {
         })
         .map(|filename| {
             println!("{:?}", filename);
-            let contents = fs::read_to_string(filename).unwrap();
+            let contents = fs::read_to_string(&filename).unwrap();
             let measurements: Result<Measurements> = serde_json::from_str(&contents);
+            let results = measurements.map(|x| x.results[0].clone());
+            let filepath = filename.into_os_string().into_string().unwrap();
+            let parts: Vec<&str> = filepath.split("_").collect();
+
             // the way we're running these, the files will each contain exactly one measurement
-            measurements.map(|x| x.results[0].clone())
+            results.map(|r| (parts[0].to_owned(), parts[1..].join(""), r.clone()))
         })
         .collect();
 
+    // TODO
+    // - group by project and metric
+    // - each group will have 2 measurements for each branch
+    // - perfrom regressions on each pair
+
     // TODO exit(1) when Regression is present
     match x {
         Err(e) => panic!("{}", e),

From c8bc25d11ae7a8d97b553cab7709cf9e337ffff3 Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Thu, 22 Jul 2021 17:05:08 -0400
Subject: [PATCH 14/81] add struct

---
 performance/comparison/src/main.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 67f481f9669..320585aff61 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -44,6 +44,12 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
     }
 }
 
+struct MeasurementGroup {
+    branch: String,
+    run: String,
+    measurement: Measurement,
+}
+
 fn main() {
     // TODO args lib
     let args: Vec<String> = env::args().collect();
@@ -55,7 +61,7 @@ fn main() {
 
     let path = Path::new(&results_directory);
     let result_files = fs::read_dir(path).unwrap();
-    let x: Result<Vec<(String, String, Measurement)>> = result_files
+    let x: Result<Vec<MeasurementGroup>> = result_files
         .into_iter()
         .map(|f| f.unwrap().path())
         .filter(|filename| {
@@ -73,7 +79,11 @@ fn main() {
             let parts: Vec<&str> = filepath.split("_").collect();
 
             // the way we're running these, the files will each contain exactly one measurement
-            results.map(|r| (parts[0].to_owned(), parts[1..].join(""), r.clone()))
+            results.map(|r| MeasurementGroup {
+                branch: parts[0].to_owned(),
+                run: parts[1..].join(""),
+                measurement: r.clone(),
+            })
         })
         .collect();
 

From 37b31d10c85b53dde991b54aa513278e6d8b991c Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Tue, 27 Jul 2021 11:28:32 -0400
Subject: [PATCH 15/81] add derived traits to structs

---
 performance/comparison/src/main.rs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 320585aff61..d90f5aa31e1 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -4,7 +4,7 @@ use std::path::Path;
 use std::process::exit;
 use std::{env, fs};
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 struct Measurement {
     command: String,
     mean: f64,
@@ -17,11 +17,12 @@ struct Measurement {
     times: Vec<f64>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 struct Measurements {
     results: Vec<Measurement>,
 }
 
+#[derive(Debug, Clone, PartialEq)]
 struct Regression {
     threshold: f64,
     difference: f64,
@@ -34,8 +35,8 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
     let difference = latest.median / base.median;
     if difference > threshold {
         Some(Regression {
-            threshold: threshold,
-            difference: difference,
+            threshold,
+            difference,
             base: base.clone(),
             latest: latest.clone(),
         })
@@ -44,6 +45,7 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
     }
 }
 
+#[derive(Debug, Clone, PartialEq)]
 struct MeasurementGroup {
     branch: String,
     run: String,
@@ -61,7 +63,7 @@ fn main() {
 
     let path = Path::new(&results_directory);
     let result_files = fs::read_dir(path).unwrap();
-    let x: Result<Vec<MeasurementGroup>> = result_files
+    let measurements: Result<Vec<MeasurementGroup>> = result_files
         .into_iter()
         .map(|f| f.unwrap().path())
         .filter(|filename| {
@@ -90,13 +92,15 @@ fn main() {
     // TODO
     // - group by project and metric
     // - each group will have 2 measurements for each branch
-    // - perfrom regressions on each pair
+    // - perform regressions on each pair
+    // dev_parse_02_mini_project_dont_merge.json
+    // dev_parse_01_mini_project_dont_merge.json
 
     // TODO exit(1) when Regression is present
-    match x {
+    match measurements {
         Err(e) => panic!("{}", e),
         Ok(_) => (),
     }
 
-    println!("{:?}", x);
+    println!("{:?}", measurements);
 }

From 4ddba7e44c6b232e5348337fd4dc8b70742785cc Mon Sep 17 00:00:00 2001
From: Kyle Wigley <kyle@fishtownanalytics.com>
Date: Tue, 27 Jul 2021 12:44:06 -0400
Subject: [PATCH 16/81] --wip--

---
 performance/comparison/src/main.rs   | 116 +++++++++++++++++----------
 performance/project_config/.user.yml |   1 +
 2 files changed, 74 insertions(+), 43 deletions(-)
 create mode 100644 performance/project_config/.user.yml

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index d90f5aa31e1..e3335277f83 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -26,18 +26,25 @@ struct Measurements {
 struct Regression {
     threshold: f64,
     difference: f64,
-    base: Measurement,
+    baseline: Measurement,
     latest: Measurement,
 }
 
-fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
+#[derive(Debug, Clone, PartialEq)]
+struct MeasurementGroup {
+    version: String,
+    run: String,
+    measurement: Measurement,
+}
+
+fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
     let threshold = 1.05; // 5% regression threshold
-    let difference = latest.median / base.median;
+    let difference = latest.median / baseline.median;
     if difference > threshold {
         Some(Regression {
             threshold,
             difference,
-            base: base.clone(),
+            baseline: baseline.clone(),
             latest: latest.clone(),
         })
     } else {
@@ -45,25 +52,12 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option<Regression> {
     }
 }
 
-#[derive(Debug, Clone, PartialEq)]
-struct MeasurementGroup {
-    branch: String,
-    run: String,
-    measurement: Measurement,
-}
-
-fn main() {
-    // TODO args lib
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("please provide the results directory");
-        exit(1);
-    }
-    let results_directory = &args[1];
+fn measurements_from_files(
+    results_directory: &Path,
+) -> std::result::Result<Vec<Measurements>, std::io::Error> {
+    let result_files = fs::read_dir(results_directory)?;
 
-    let path = Path::new(&results_directory);
-    let result_files = fs::read_dir(path).unwrap();
-    let measurements: Result<Vec<MeasurementGroup>> = result_files
+    result_files
         .into_iter()
         .map(|f| f.unwrap().path())
         .filter(|filename| {
@@ -74,33 +68,69 @@ fn main() {
         })
         .map(|filename| {
             println!("{:?}", filename);
-            let contents = fs::read_to_string(&filename).unwrap();
-            let measurements: Result<Measurements> = serde_json::from_str(&contents);
-            let results = measurements.map(|x| x.results[0].clone());
-            let filepath = filename.into_os_string().into_string().unwrap();
-            let parts: Vec<&str> = filepath.split("_").collect();
-
-            // the way we're running these, the files will each contain exactly one measurement
-            results.map(|r| MeasurementGroup {
-                branch: parts[0].to_owned(),
-                run: parts[1..].join(""),
-                measurement: r.clone(),
-            })
+            let contents = fs::read_to_string(&filename);
+            contents.map(|c| serde_json::from_str::<Measurements>(&c).unwrap()) //TODO rm unwrap!
         })
-        .collect();
+        .collect()
+}
+
+fn detect_regressions(measurements: Vec<Measurements>) -> Vec<Regression> {
+    panic!()
+}
+
+fn main() {
+    // TODO args lib
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        println!("please provide the results directory");
+        exit(1);
+    }
+    let results_directory = &args[1];
+
+    let foo = measurements_from_files(Path::new(&results_directory));
+    foo.map(|x| detect_regressions(x));
+
+    // let measurements: Result<Vec<(&str, Vec<MeasurementGroup>)>> = result_files
+    //     .into_iter()
+    //     .map(|f| f.unwrap().path())
+    //     .filter(|filename| {
+    //         filename
+    //             .extension()
+    //             .and_then(|ext| ext.to_str())
+    //             .map_or(false, |ext| ext.ends_with("json"))
+    //     })
+    //     .map(|filename| {
+    //         println!("{:?}", filename);
+    //         let contents = fs::read_to_string(&filename).unwrap();
+    //         let measurements: Result<Measurements> = serde_json::from_str(&contents);
+    //         let results = measurements.map(|x| x.results[0].clone());
+    //         let filepath = filename.into_os_string().into_string().unwrap();
+    //         let parts: Vec<&str> = filepath.split("_").collect();
+
+    //         // the way we're running these, the files will each contain exactly one measurement
+    //         results.map(|r| MeasurementGroup {
+    //             version: parts[0].to_owned(),
+    //             run: parts[1..].join(""),
+    //             measurement: r.clone(),
+    //         })
+    //     })
+    //     .collect();
 
     // TODO
     // - group by project and metric
     // - each group will have 2 measurements for each branch
     // - perform regressions on each pair
-    // dev_parse_02_mini_project_dont_merge.json
-    // dev_parse_01_mini_project_dont_merge.json
+    // latest_parse_02_mini_project_dont_merge.json
+    // latest_parse_01_mini_project_dont_merge.json
+    // baseline_parse_02_mini_project_dont_merge.json
+    // baseline_parse_01_mini_project_dont_merge.json
+    // command, project -> 2 version
+    // let x: Vec<(&str, Vec<MeasurementGroup>)>;
 
     // TODO exit(1) when Regression is present
-    match measurements {
-        Err(e) => panic!("{}", e),
-        Ok(_) => (),
-    }
-
-    println!("{:?}", measurements);
+    // match foo {
+    //     Err(&e) => panic!("{}", &e),
+    //     Ok(_) => (),
+    // }
+    foo.clone().map(|f| println!("{:?}", f)).unwrap();
 }
diff --git a/performance/project_config/.user.yml b/performance/project_config/.user.yml
new file mode 100644
index 00000000000..1870bd1ea47
--- /dev/null
+++ b/performance/project_config/.user.yml
@@ -0,0 +1 @@
+id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b

From 45bb955b55fc37f965ceb7b8299fe719de4f93f6 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 28 Jul 2021 15:54:04 -0400
Subject: [PATCH 17/81] still wip but compiles

---
 performance/comparison/src/main.rs | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index e3335277f83..6492b8483df 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -87,8 +87,8 @@ fn main() {
     }
     let results_directory = &args[1];
 
-    let foo = measurements_from_files(Path::new(&results_directory));
-    foo.map(|x| detect_regressions(x));
+    let measurements = measurements_from_files(Path::new(&results_directory));
+    let regressions = measurements.map(|x| detect_regressions(x));
 
     // let measurements: Result<Vec<(&str, Vec<MeasurementGroup>)>> = result_files
     //     .into_iter()
@@ -132,5 +132,10 @@ fn main() {
     //     Err(&e) => panic!("{}", &e),
     //     Ok(_) => (),
     // }
-    foo.clone().map(|f| println!("{:?}", f)).unwrap();
+
+    // print the regressions or throw
+    match regressions {
+        Err(e) => panic!("{}", e),
+        Ok(rs) => println!("{:?}", rs)
+    }
 }

From d5662ef34c2784a0adab45b966da1725094a1b23 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 16:33:43 -0400
Subject: [PATCH 18/81] wrap up comparsion logic

---
 performance/comparison/Cargo.lock  |  37 ++++++++
 performance/comparison/Cargo.toml  |   2 +
 performance/comparison/src/main.rs | 147 ++++++++++++++++++-----------
 3 files changed, 130 insertions(+), 56 deletions(-)

diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock
index 4bf9976c9a0..76e80198abc 100644
--- a/performance/comparison/Cargo.lock
+++ b/performance/comparison/Cargo.lock
@@ -6,8 +6,25 @@ version = 3
 name = "comparison"
 version = "0.1.0"
 dependencies = [
+ "itertools",
  "serde",
  "serde_json",
+ "thiserror",
+]
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "itertools"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
+dependencies = [
+ "either",
 ]
 
 [[package]]
@@ -82,6 +99,26 @@ dependencies = [
  "unicode-xid",
 ]
 
+[[package]]
+name = "thiserror"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "unicode-xid"
 version = "0.2.2"
diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml
index df15857b46e..59ee1954fbd 100644
--- a/performance/comparison/Cargo.toml
+++ b/performance/comparison/Cargo.toml
@@ -4,5 +4,7 @@ version = "0.1.0"
 edition = "2018"
 
 [dependencies]
+itertools = "0.10.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+thiserror = "1.0.26"
diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 6492b8483df..c3ed7e607fc 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -1,8 +1,12 @@
+use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use serde_json::Result;
-use std::path::Path;
+use std::path::{
+    Path,
+    PathBuf
+};
 use std::process::exit;
-use std::{env, fs};
+use std::{env, fs, io};
+use thiserror::Error;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 struct Measurement {
@@ -37,6 +41,20 @@ struct MeasurementGroup {
     measurement: Measurement,
 }
 
+#[derive(Error, Debug)]
+enum TestError {
+    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())]
+    BadJSONErr(PathBuf, serde_json::Error),
+    #[error("ReadErr: The file cannot be read. \nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())]
+    ReadErr(PathBuf, io::Error),
+    #[error("MissingFilenameErr: The path provided does not specify a file. \nFilepath: {}\n", .0.to_string_lossy().into_owned())]
+    MissingFilenameErr(PathBuf),
+    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    FilenameNotUnicodeErr(PathBuf),
+    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    BadFileContentsErr(PathBuf, io::Error),
+}
+
 fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
     let threshold = 1.05; // 5% regression threshold
     let difference = latest.median / baseline.median;
@@ -52,10 +70,13 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression
     }
 }
 
+// given a directory, read all files in the directory and return each
+// filename with the deserialized json contents of that file
 fn measurements_from_files(
     results_directory: &Path,
-) -> std::result::Result<Vec<Measurements>, std::io::Error> {
-    let result_files = fs::read_dir(results_directory)?;
+) -> Result<Vec<(PathBuf, Measurements)>, TestError> {
+    let result_files = fs::read_dir(results_directory)
+        .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), e)))?;
 
     result_files
         .into_iter()
@@ -68,14 +89,55 @@ fn measurements_from_files(
         })
         .map(|filename| {
             println!("{:?}", filename);
-            let contents = fs::read_to_string(&filename);
-            contents.map(|c| serde_json::from_str::<Measurements>(&c).unwrap()) //TODO rm unwrap!
+            fs::read_to_string(&filename)
+                .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e)))
+                .and_then(|contents|
+                    serde_json::from_str::<Measurements>(&contents)
+                        .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e)))
+                )
+                .map(|m| (filename, m))
         })
         .collect()
 }
 
-fn detect_regressions(measurements: Vec<Measurements>) -> Vec<Regression> {
-    panic!()
+// given a list of filename-measurement pairs, detect any regressions by grouping
+// measurements together by filename
+fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result<Vec<Regression>, TestError> {
+    let mut measurement_groups: Vec<MeasurementGroup> = measurements
+        .into_iter()
+        .map(|(p, m)| {
+            p.file_name()
+                .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf()))
+                .and_then(|name| name.to_str().ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf())))
+                    .map(|name| {
+                    let parts: Vec<&str> = name.split("_").collect();
+                    MeasurementGroup {
+                        version: parts[0].to_owned(),
+                        run: parts[1..].join(""),
+                        measurement: m.clone(),
+                    }
+                })
+        })
+        .collect::<Result<Vec<MeasurementGroup>, TestError>>()?;
+    
+    measurement_groups
+        .sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
+
+    // locking up mutation
+    let sorted_measurement_groups = measurement_groups;
+    
+    let x: Vec<Regression> = sorted_measurement_groups
+        .into_iter()
+        .group_by(|x| (x.run.clone(), x.version.clone()))
+        .into_iter()
+        .map(|(_, group)| {
+            let g: Vec<MeasurementGroup> = group.collect();
+            regression(&g[0].measurement, &g[1].measurement)
+        })
+        .flatten()
+        .collect();
+    
+    Ok(x)
 }
 
 fn main() {
@@ -87,55 +149,28 @@ fn main() {
     }
     let results_directory = &args[1];
 
-    let measurements = measurements_from_files(Path::new(&results_directory));
-    let regressions = measurements.map(|x| detect_regressions(x));
-
-    // let measurements: Result<Vec<(&str, Vec<MeasurementGroup>)>> = result_files
-    //     .into_iter()
-    //     .map(|f| f.unwrap().path())
-    //     .filter(|filename| {
-    //         filename
-    //             .extension()
-    //             .and_then(|ext| ext.to_str())
-    //             .map_or(false, |ext| ext.ends_with("json"))
-    //     })
-    //     .map(|filename| {
-    //         println!("{:?}", filename);
-    //         let contents = fs::read_to_string(&filename).unwrap();
-    //         let measurements: Result<Measurements> = serde_json::from_str(&contents);
-    //         let results = measurements.map(|x| x.results[0].clone());
-    //         let filepath = filename.into_os_string().into_string().unwrap();
-    //         let parts: Vec<&str> = filepath.split("_").collect();
-
-    //         // the way we're running these, the files will each contain exactly one measurement
-    //         results.map(|r| MeasurementGroup {
-    //             version: parts[0].to_owned(),
-    //             run: parts[1..].join(""),
-    //             measurement: r.clone(),
-    //         })
-    //     })
-    //     .collect();
-
-    // TODO
-    // - group by project and metric
-    // - each group will have 2 measurements for each branch
-    // - perform regressions on each pair
-    // latest_parse_02_mini_project_dont_merge.json
-    // latest_parse_01_mini_project_dont_merge.json
-    // baseline_parse_02_mini_project_dont_merge.json
-    // baseline_parse_01_mini_project_dont_merge.json
-    // command, project -> 2 version
-    // let x: Vec<(&str, Vec<MeasurementGroup>)>;
-
-    // TODO exit(1) when Regression is present
-    // match foo {
-    //     Err(&e) => panic!("{}", &e),
-    //     Ok(_) => (),
-    // }
+    let regressions = measurements_from_files(Path::new(&results_directory))
+        .and_then(|v| {
+            let v_next: Vec<(PathBuf, Measurement)> = v.into_iter()
+                // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
+                .map(|(p, ms)| (p, ms.results[0].clone()))
+                .collect();
+            
+            detect_regressions(&v_next)
+        });
 
-    // print the regressions or throw
     match regressions {
         Err(e) => panic!("{}", e),
-        Ok(rs) => println!("{:?}", rs)
+        Ok(rs) => match rs[..] {
+            [] => println!("congrats! no regressions"),
+            _ => {
+                for r in rs {
+                    println!("{:?}", r);
+                }
+                println!("the above regressions were found.");
+                exit(1)
+            },
+        },
+        
     }
 }

From 80951ae973a514aae14fedc6b6ec61995b1ece22 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 16:34:47 -0400
Subject: [PATCH 19/81] fmt

---
 performance/comparison/src/main.rs | 50 +++++++++++++++---------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index c3ed7e607fc..5d7be748429 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -1,9 +1,6 @@
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use std::path::{
-    Path,
-    PathBuf
-};
+use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::{env, fs, io};
 use thiserror::Error;
@@ -91,10 +88,10 @@ fn measurements_from_files(
             println!("{:?}", filename);
             fs::read_to_string(&filename)
                 .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e)))
-                .and_then(|contents|
+                .and_then(|contents| {
                     serde_json::from_str::<Measurements>(&contents)
                         .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e)))
-                )
+                })
                 .map(|m| (filename, m))
         })
         .collect()
@@ -102,14 +99,19 @@ fn measurements_from_files(
 
 // given a list of filename-measurement pairs, detect any regressions by grouping
 // measurements together by filename
-fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result<Vec<Regression>, TestError> {
+fn detect_regressions(
+    measurements: &[(PathBuf, Measurement)],
+) -> Result<Vec<Regression>, TestError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
         .into_iter()
         .map(|(p, m)| {
             p.file_name()
                 .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf()))
-                .and_then(|name| name.to_str().ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf())))
-                    .map(|name| {
+                .and_then(|name| {
+                    name.to_str()
+                        .ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf()))
+                })
+                .map(|name| {
                     let parts: Vec<&str> = name.split("_").collect();
                     MeasurementGroup {
                         version: parts[0].to_owned(),
@@ -119,13 +121,12 @@ fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result<Vec<Reg
                 })
         })
         .collect::<Result<Vec<MeasurementGroup>, TestError>>()?;
-    
-    measurement_groups
-        .sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
+
+    measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
 
     // locking up mutation
     let sorted_measurement_groups = measurement_groups;
-    
+
     let x: Vec<Regression> = sorted_measurement_groups
         .into_iter()
         .group_by(|x| (x.run.clone(), x.version.clone()))
@@ -136,7 +137,7 @@ fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result<Vec<Reg
         })
         .flatten()
         .collect();
-    
+
     Ok(x)
 }
 
@@ -149,15 +150,15 @@ fn main() {
     }
     let results_directory = &args[1];
 
-    let regressions = measurements_from_files(Path::new(&results_directory))
-        .and_then(|v| {
-            let v_next: Vec<(PathBuf, Measurement)> = v.into_iter()
-                // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
-                .map(|(p, ms)| (p, ms.results[0].clone()))
-                .collect();
-            
-            detect_regressions(&v_next)
-        });
+    let regressions = measurements_from_files(Path::new(&results_directory)).and_then(|v| {
+        let v_next: Vec<(PathBuf, Measurement)> = v
+            .into_iter()
+            // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
+            .map(|(p, ms)| (p, ms.results[0].clone()))
+            .collect();
+
+        detect_regressions(&v_next)
+    });
 
     match regressions {
         Err(e) => panic!("{}", e),
@@ -169,8 +170,7 @@ fn main() {
                 }
                 println!("the above regressions were found.");
                 exit(1)
-            },
+            }
         },
-        
     }
 }

From 5466d474c5a36bd5f50b0b8e8fdd8a902a9983ed Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 17:07:41 -0400
Subject: [PATCH 20/81] add test for exception display

---
 performance/comparison/src/main.rs | 64 +++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 5d7be748429..762fe5056dc 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -40,16 +40,16 @@ struct MeasurementGroup {
 
 #[derive(Error, Debug)]
 enum TestError {
-    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())]
-    BadJSONErr(PathBuf, serde_json::Error),
-    #[error("ReadErr: The file cannot be read. \nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())]
-    ReadErr(PathBuf, io::Error),
-    #[error("MissingFilenameErr: The path provided does not specify a file. \nFilepath: {}\n", .0.to_string_lossy().into_owned())]
+    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    BadJSONErr(PathBuf, Option<serde_json::Error>),
+    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    ReadErr(PathBuf, Option<io::Error>),
+    #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
     MissingFilenameErr(PathBuf),
     #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
     FilenameNotUnicodeErr(PathBuf),
-    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
-    BadFileContentsErr(PathBuf, io::Error),
+    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    BadFileContentsErr(PathBuf, Option<io::Error>),
 }
 
 fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
@@ -73,7 +73,7 @@ fn measurements_from_files(
     results_directory: &Path,
 ) -> Result<Vec<(PathBuf, Measurements)>, TestError> {
     let result_files = fs::read_dir(results_directory)
-        .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), e)))?;
+        .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), Some(e))))?;
 
     result_files
         .into_iter()
@@ -87,10 +87,10 @@ fn measurements_from_files(
         .map(|filename| {
             println!("{:?}", filename);
             fs::read_to_string(&filename)
-                .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e)))
+                .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e))))
                 .and_then(|contents| {
                     serde_json::from_str::<Measurements>(&contents)
-                        .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e)))
+                        .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), Some(e))))
                 })
                 .map(|m| (filename, m))
         })
@@ -174,3 +174,47 @@ fn main() {
         },
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_error_messages() {
+        let pairs = vec![
+            (
+                TestError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
+                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
+Filepath: dummy/path/file.json
+Originating Exception:None"#
+            ),
+            (
+                TestError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
+                r#"ReadErr: The file cannot be read.
+Filepath: dummy/path/file.json
+Originating Exception:None"#
+            ),
+            (
+                TestError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"MissingFilenameErr: The path provided does not specify a file.
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                TestError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                TestError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None),
+                r#"BadFileContentsErr: Check that the file exists and is readable.
+Filepath: dummy/path/filenotexist.json
+Originating Exception:None"#
+            ),
+        ];
+
+        for (err, msg) in pairs {
+            assert_eq!(format!("{}", err), msg)
+        }
+    }
+
+}

From fa78102eaf4b1512c2abc46375a59931985c3921 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 17:23:30 -0400
Subject: [PATCH 21/81] add comparison to workflow

---
 .github/workflows/performance.yml | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 478f12eda63..f948afa8427 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -127,14 +127,29 @@ jobs:
     name: Compare Results
     runs-on: ubuntu-latest
     steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - name: "test rust comparator"
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --release --manifest-path performance/comparison/Cargo.toml
+      - name: "build runnable rust comparator"
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/comparison/Cargo.toml
       - uses: actions/download-artifact@v2
         with:
           name: dev-results
-      - name: ls result files
-        run: ls
       - uses: actions/download-artifact@v2
         with:
           name: latest-results
       - name: ls result files
         run: ls
-      # TODO actually compare
+      - name: "run comparator"
+        run: ./performance/comparison/target/release/comparison performance/results/

From 8a5e9b71a57d651889ebd0d2d898aa1f2360e5ca Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 17:50:33 -0400
Subject: [PATCH 22/81] add some output to comparitor

---
 performance/comparison/src/main.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 762fe5056dc..864ff086ce4 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::{env, fs, io};
+use std::borrow::Cow;
 use thiserror::Error;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -157,6 +158,11 @@ fn main() {
             .map(|(p, ms)| (p, ms.results[0].clone()))
             .collect();
 
+        println!("checking regressions with the following measurements:");
+        for (path, _) in &v_next {
+            println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))
+        }
+
         detect_regressions(&v_next)
     });
 

From 429396aa0293fdd58acbadb1a4acecb1d19df32f Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 17:55:26 -0400
Subject: [PATCH 23/81] move step dependencies around

---
 .github/workflows/performance.yml | 70 +++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index f948afa8427..1fffbbe42a7 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -17,7 +17,7 @@ jobs:
   # runs any tests associated with the runner
   # these tests make sure the runner logic is correct
   # if there are no tests, it still quickly builds the project to fail fast
-  test-rust:
+  test-runner:
     name: Test Runner
     runs-on: ubuntu-latest
     steps:
@@ -32,9 +32,27 @@ jobs:
           command: test
           args: --manifest-path performance/runner/Cargo.toml
 
+  # runs any tests associated with the comparator
+  # these tests make sure the comparison logic is correct
+  # if there are no tests, it still quickly builds the project to fail fast
+  test-comparator:
+    name: Test Comparator
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path performance/comparison/Cargo.toml
+
   # build an optimized binary to be used as the runner in later steps
-  build-rust:
-    needs: [test-rust]
+  build-runner:
+    needs: [test-runner]
     name: Build Runner
     runs-on: ubuntu-latest
     env:
@@ -55,9 +73,32 @@ jobs:
           name: runner
           path: performance/runner/target/release/runner
 
+  # build an optimized binary to be used as the comparator in later steps
+  build-comparator:
+    needs: [test-comparator]
+    name: Build Runner
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-D warnings"
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/comparison/Cargo.toml
+      - uses: actions/upload-artifact@v2
+        with:
+          name: comparison
+          path: performance/comparison/target/release/comparison
+
   # run the performance measurements on the current or default branch
   measure-dev:
-    needs: [build-rust]
+    needs: [build-runner, build-comparator]
     name: Measure Dev Branch
     runs-on: ubuntu-latest
     steps:
@@ -85,7 +126,7 @@ jobs:
 
   # run the performance measurements on the latest release branch
   measure-latest:
-    needs: [build-rust]
+    needs: [build-runner, build-comparator]
     name: Measure Latest Branch
     runs-on: ubuntu-latest
     steps:
@@ -133,16 +174,6 @@ jobs:
           profile: minimal
           toolchain: stable
           override: true
-      - name: "test rust comparator"
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --release --manifest-path performance/comparison/Cargo.toml
-      - name: "build runnable rust comparator"
-        uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --release --manifest-path performance/comparison/Cargo.toml
       - uses: actions/download-artifact@v2
         with:
           name: dev-results
@@ -151,5 +182,10 @@ jobs:
           name: latest-results
       - name: ls result files
         run: ls
-      - name: "run comparator"
-        run: ./performance/comparison/target/release/comparison performance/results/
+      - uses: actions/download-artifact@v2
+        with:
+          name: comparator
+      - name: change permissions
+        run: chmod +x ./comparison
+      - name: run comparator
+        run: ./comparison performance/results/

From f4775d76733c0287fe24efe2278d02e235b43e8c Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 17:56:29 -0400
Subject: [PATCH 24/81] change job name

---
 .github/workflows/performance.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 1fffbbe42a7..1ae92bf729e 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -76,7 +76,7 @@ jobs:
   # build an optimized binary to be used as the comparator in later steps
   build-comparator:
     needs: [test-comparator]
-    name: Build Runner
+    name: Build Comparator
     runs-on: ubuntu-latest
     env:
       RUSTFLAGS: "-D warnings"

From 6ba837d73ddf0d9a73c337a36d000a35cbc588b2 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 18:03:04 -0400
Subject: [PATCH 25/81] fix download artifact name

---
 .github/workflows/performance.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 1ae92bf729e..ed3facd0cc6 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -184,7 +184,7 @@ jobs:
         run: ls
       - uses: actions/download-artifact@v2
         with:
-          name: comparator
+          name: comparison
       - name: change permissions
         run: chmod +x ./comparison
       - name: run comparator

From 3d4a82cca2087ea686ab3037e4dd50e2cf094079 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 2 Aug 2021 18:09:31 -0400
Subject: [PATCH 26/81] add comments

---
 .github/workflows/performance.yml  | 4 ++++
 performance/comparison/src/main.rs | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index ed3facd0cc6..72e16951eff 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -125,6 +125,8 @@ jobs:
           path: performance/results/
 
   # run the performance measurements on the latest release branch
+  # this part takes by far the longest, so we do everything we can first
+  # so the job fails fast.
   measure-latest:
     needs: [build-runner, build-comparator]
     name: Measure Latest Branch
@@ -163,6 +165,8 @@ jobs:
           name: latest-results
           path: performance/results/
 
+  # run the comparison app on the result output generated from measuring
+  # the two branches. Exits with non-zero code if a regression is detected.
   compare-results:
     needs: [measure-dev, measure-latest]
     name: Compare Results
diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs
index 864ff086ce4..63bf21b2ed8 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/comparison/src/main.rs
@@ -142,6 +142,7 @@ fn detect_regressions(
     Ok(x)
 }
 
+// TODO merge runner and comparison projects into one with two sub commands
 fn main() {
     // TODO args lib
     let args: Vec<String> = env::args().collect();
@@ -160,6 +161,7 @@ fn main() {
 
         println!("checking regressions with the following measurements:");
         for (path, _) in &v_next {
+            // TODO this printed nothing. What's the size of this vector?
             println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))
         }
 

From 2f7ab2d038210088ff14fd7b2e2db4821f14b638 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:04:45 -0400
Subject: [PATCH 27/81] unify rust apps as subcommands

---
 .github/workflows/performance.yml             |  58 +---
 performance/comparison/Cargo.lock             | 126 --------
 performance/comparison/Cargo.toml             |  10 -
 performance/runner/Cargo.lock                 | 300 ++++++++++++++++++
 performance/runner/Cargo.toml                 |   7 +-
 .../src/main.rs => runner/src/calculate.rs}   |  44 +--
 performance/runner/src/main.rs                | 114 ++-----
 performance/runner/src/measure.rs             |  97 ++++++
 8 files changed, 459 insertions(+), 297 deletions(-)
 delete mode 100644 performance/comparison/Cargo.lock
 delete mode 100644 performance/comparison/Cargo.toml
 rename performance/{comparison/src/main.rs => runner/src/calculate.rs} (89%)
 create mode 100644 performance/runner/src/measure.rs

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 72e16951eff..7267503bd24 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -16,7 +16,6 @@ jobs:
 
   # runs any tests associated with the runner
   # these tests make sure the runner logic is correct
-  # if there are no tests, it still quickly builds the project to fail fast
   test-runner:
     name: Test Runner
     runs-on: ubuntu-latest
@@ -32,24 +31,6 @@ jobs:
           command: test
           args: --manifest-path performance/runner/Cargo.toml
 
-  # runs any tests associated with the comparator
-  # these tests make sure the comparison logic is correct
-  # if there are no tests, it still quickly builds the project to fail fast
-  test-comparator:
-    name: Test Comparator
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --manifest-path performance/comparison/Cargo.toml
-
   # build an optimized binary to be used as the runner in later steps
   build-runner:
     needs: [test-runner]
@@ -73,32 +54,9 @@ jobs:
           name: runner
           path: performance/runner/target/release/runner
 
-  # build an optimized binary to be used as the comparator in later steps
-  build-comparator:
-    needs: [test-comparator]
-    name: Build Comparator
-    runs-on: ubuntu-latest
-    env:
-      RUSTFLAGS: "-D warnings"
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
-      - uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --release --manifest-path performance/comparison/Cargo.toml
-      - uses: actions/upload-artifact@v2
-        with:
-          name: comparison
-          path: performance/comparison/target/release/comparison
-
   # run the performance measurements on the current or default branch
   measure-dev:
-    needs: [build-runner, build-comparator]
+    needs: [build-runner]
     name: Measure Dev Branch
     runs-on: ubuntu-latest
     steps:
@@ -118,7 +76,7 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run
-        run: ./runner ${{ github.workspace }}/performance/projects/ dev
+        run: ./runner -p ${{ github.workspace }}/performance/projects/ -b dev
       - uses: actions/upload-artifact@v2
         with:
           name: dev-results
@@ -128,7 +86,7 @@ jobs:
   # this part takes by far the longest, so we do everything we can first
   # so the job fails fast.
   measure-latest:
-    needs: [build-runner, build-comparator]
+    needs: [build-runner]
     name: Measure Latest Branch
     runs-on: ubuntu-latest
     steps:
@@ -159,7 +117,7 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run runner
-        run: ./runner ${{ github.workspace }}/performance/projects/ latest
+        run: ./runner -p ${{ github.workspace }}/performance/projects/ -b latest
       - uses: actions/upload-artifact@v2
         with:
           name: latest-results
@@ -167,7 +125,7 @@ jobs:
 
   # run the comparison app on the result output generated from measuring
   # the two branches. Exits with non-zero code if a regression is detected.
-  compare-results:
+  calculate-regressions:
     needs: [measure-dev, measure-latest]
     name: Compare Results
     runs-on: ubuntu-latest
@@ -190,6 +148,6 @@ jobs:
         with:
           name: comparison
       - name: change permissions
-        run: chmod +x ./comparison
-      - name: run comparator
-        run: ./comparison performance/results/
+        run: chmod +x ./runner
+      - name: run calculation
+        run: ./runner calculate -r performance/results/
diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock
deleted file mode 100644
index 76e80198abc..00000000000
--- a/performance/comparison/Cargo.lock
+++ /dev/null
@@ -1,126 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "comparison"
-version = "0.1.0"
-dependencies = [
- "itertools",
- "serde",
- "serde_json",
- "thiserror",
-]
-
-[[package]]
-name = "either"
-version = "1.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
-
-[[package]]
-name = "itertools"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
-dependencies = [
- "either",
-]
-
-[[package]]
-name = "itoa"
-version = "0.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038"
-dependencies = [
- "unicode-xid",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
-
-[[package]]
-name = "serde"
-version = "1.0.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.126"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.64"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "syn"
-version = "1.0.74"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-xid",
-]
-
-[[package]]
-name = "thiserror"
-version = "1.0.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
-dependencies = [
- "thiserror-impl",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "1.0.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "unicode-xid"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml
deleted file mode 100644
index 59ee1954fbd..00000000000
--- a/performance/comparison/Cargo.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[package]
-name = "comparison"
-version = "0.1.0"
-edition = "2018"
-
-[dependencies]
-itertools = "0.10.1"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
-thiserror = "1.0.26"
diff --git a/performance/runner/Cargo.lock b/performance/runner/Cargo.lock
index 0832b7312b5..8b90cedfef8 100644
--- a/performance/runner/Cargo.lock
+++ b/performance/runner/Cargo.lock
@@ -2,6 +2,306 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
+
+[[package]]
+name = "clap"
+version = "2.33.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+dependencies = [
+ "proc-macro2",
+]
+
 [[package]]
 name = "runner"
 version = "0.1.0"
+dependencies = [
+ "itertools",
+ "serde",
+ "serde_json",
+ "structopt",
+ "thiserror",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
+
+[[package]]
+name = "serde"
+version = "1.0.127"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.127"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "structopt"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71"
+dependencies = [
+ "clap",
+ "lazy_static",
+ "structopt-derive",
+]
+
+[[package]]
+name = "structopt-derive"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "version_check"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/performance/runner/Cargo.toml b/performance/runner/Cargo.toml
index 77ff9695b77..a1581255689 100644
--- a/performance/runner/Cargo.toml
+++ b/performance/runner/Cargo.toml
@@ -3,6 +3,9 @@ name = "runner"
 version = "0.1.0"
 edition = "2018"
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
+itertools = "0.10.1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+structopt = "0.3"
+thiserror = "1.0.26"
diff --git a/performance/comparison/src/main.rs b/performance/runner/src/calculate.rs
similarity index 89%
rename from performance/comparison/src/main.rs
rename to performance/runner/src/calculate.rs
index 63bf21b2ed8..c2e0d592b4b 100644
--- a/performance/comparison/src/main.rs
+++ b/performance/runner/src/calculate.rs
@@ -2,7 +2,7 @@ use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::process::exit;
-use std::{env, fs, io};
+use std::{fs, io};
 use std::borrow::Cow;
 use thiserror::Error;
 
@@ -25,7 +25,7 @@ struct Measurements {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-struct Regression {
+pub struct Regression {
     threshold: f64,
     difference: f64,
     baseline: Measurement,
@@ -40,7 +40,7 @@ struct MeasurementGroup {
 }
 
 #[derive(Error, Debug)]
-enum TestError {
+pub enum TestError {
     #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     BadJSONErr(PathBuf, Option<serde_json::Error>),
     #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
@@ -100,7 +100,7 @@ fn measurements_from_files(
 
 // given a list of filename-measurement pairs, detect any regressions by grouping
 // measurements together by filename
-fn detect_regressions(
+fn calculate_regressions(
     measurements: &[(PathBuf, Measurement)],
 ) -> Result<Vec<Regression>, TestError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
@@ -142,17 +142,8 @@ fn detect_regressions(
     Ok(x)
 }
 
-// TODO merge runner and comparison projects into one with two sub commands
-fn main() {
-    // TODO args lib
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("please provide the results directory");
-        exit(1);
-    }
-    let results_directory = &args[1];
-
-    let regressions = measurements_from_files(Path::new(&results_directory)).and_then(|v| {
+pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestError> {
+    measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         let v_next: Vec<(PathBuf, Measurement)> = v
             .into_iter()
             // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
@@ -165,21 +156,20 @@ fn main() {
             println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))
         }
 
-        detect_regressions(&v_next)
-    });
+        calculate_regressions(&v_next)
+    })
+}
 
+pub fn exit_properly(regressions: &[Regression]) {
     match regressions {
-        Err(e) => panic!("{}", e),
-        Ok(rs) => match rs[..] {
-            [] => println!("congrats! no regressions"),
-            _ => {
-                for r in rs {
-                    println!("{:?}", r);
-                }
-                println!("the above regressions were found.");
-                exit(1)
+        [] => println!("congrats! no regressions"),
+        _ => {
+            for r in regressions {
+                println!("{:?}", r);
             }
-        },
+            println!("the above regressions were found.");
+            exit(1)
+        }
     }
 }
 
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 86c8e01cdd1..d38d823870c 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -1,87 +1,37 @@
-use std::path::Path;
-use std::process::{exit, Command, ExitStatus};
-use std::{env, fs, io};
-
-#[derive(Debug, Clone)]
-struct Metric<'a> {
-    name: &'a str,
-    prepare: &'a str,
-    cmd: &'a str,
-}
-
-impl Metric<'_> {
-    fn outfile(&self, project: &str, branch: &str) -> String {
-        [branch, "_", self.name, "_", project, ".json"].join("")
-    }
+extern crate structopt;
+
+mod calculate;
+mod measure;
+
+use std::path::PathBuf;
+use structopt::StructOpt;
+
+#[derive(Clone, Debug, StructOpt)]
+#[structopt(name = "performance", about = "performance regression testing runner")]
+enum Opt {
+    #[structopt(name = "measure")]
+    Measure {
+        #[structopt(parse(from_os_str))]
+        #[structopt(short)]
+        projects_dir: PathBuf,
+        #[structopt(short)]
+        branch_name: String,
+    },
+    #[structopt(name = "calculate")]
+    Calculate {
+        #[structopt(parse(from_os_str))]
+        #[structopt(short)]
+        results_dir: PathBuf,
+    },
 }
 
 fn main() {
-    // TODO args lib
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 3 {
-        println!("please provide the target projects directory and branch name");
-        exit(1);
-    }
-    let projects_directory = &args[1];
-    let dbt_branch = &args[2];
-
-    // to add a new metric to the test suite, simply define it in this list:
-    let metrics: Vec<Metric> = vec![Metric {
-        name: "parse",
-        prepare: "rm -rf target/",
-        cmd: "dbt parse --no-version-check",
-    }];
-
-    // list out all projects
-    let path = Path::new(&projects_directory);
-    let project_entries = fs::read_dir(path).unwrap();
-
-    let results: Result<Vec<ExitStatus>, io::Error> = project_entries
-        .map(|entry| {
-            metrics
-                .clone()
-                .into_iter()
-                .map(|metric| {
-                    let path = entry.as_ref().unwrap().path();
-                    let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
-
-                    Command::new("hyperfine")
-                        .current_dir(&path)
-                        // warms filesystem caches by running the command first without counting it.
-                        // alternatively we could clear them before each run
-                        .arg("--warmup")
-                        .arg("1")
-                        .arg("--prepare")
-                        .arg(metric.prepare)
-                        .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
-                        .arg("--export-json")
-                        .arg(
-                            ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""),
-                        )
-                        // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
-                        .arg("--show-output")
-                        .status() // use spawn() here instead for more information
-                })
-                .collect::<Vec<Result<ExitStatus, io::Error>>>()
-        })
-        .flatten()
-        .collect();
-
-    // only exit with status code 0 if everything ran as expected
-    match results {
-        // if dispatch of any of the commands failed, panic with that error
-        Err(e) => panic!("{:?}", e),
-        Ok(statuses) => {
-            for status in statuses {
-                match status.code() {
-                    None => (),
-                    Some(0) => (),
-                    // if any child command exited with a non zero status code, exit with the same one here.
-                    Some(nonzero) => exit(nonzero),
-                }
-            }
-            // everything ran as expected
-            exit(0);
-        }
+    match Opt::from_args() {
+        Opt::Measure { projects_dir, branch_name } => {
+            measure::proper_exit(measure::measure(&projects_dir, &branch_name).unwrap())
+        },
+        Opt::Calculate { results_dir } => {
+            calculate::exit_properly(&calculate::regressions(&results_dir).unwrap())
+        },
     }
 }
diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
new file mode 100644
index 00000000000..4bf8cf20f91
--- /dev/null
+++ b/performance/runner/src/measure.rs
@@ -0,0 +1,97 @@
+extern crate structopt;
+
+use std::path::PathBuf;
+use std::process::{exit, Command, ExitStatus};
+use std::{fs, io};
+use structopt::StructOpt;
+
+#[derive(Clone, Debug, StructOpt)]
+#[structopt(name = "performance", about = "performance regression testing runner")]
+enum Performance {
+    #[structopt(name = "measure")]
+    Measure {
+        #[structopt(parse(from_os_str))]
+        #[structopt(short)]
+        projects_dir: PathBuf,
+        #[structopt(short)]
+        branch_name: bool,
+    },
+    #[structopt(name = "compare")]
+    Compare {
+        #[structopt(parse(from_os_str))]
+        #[structopt(short)]
+        results_dir: PathBuf,
+    },
+}
+
+#[derive(Debug, Clone)]
+struct Metric<'a> {
+    name: &'a str,
+    prepare: &'a str,
+    cmd: &'a str,
+}
+
+impl Metric<'_> {
+    fn outfile(&self, project: &str, branch: &str) -> String {
+        [branch, "_", self.name, "_", project, ".json"].join("")
+    }
+}
+
+// calls hyperfine via system command, and returns result of runs
+pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, io::Error> {
+    // to add a new metric to the test suite, simply define it in this list:
+    // TODO read from some config file?
+    let metrics: Vec<Metric> = vec![
+        Metric {
+            name: "parse",
+            prepare: "rm -rf target/",
+            cmd: "dbt parse --no-version-check",
+        },
+    ];
+
+    // list out all projects
+    let project_entries = fs::read_dir(projects_directory).unwrap();
+
+    project_entries
+        .map(|entry| {
+            metrics
+                .clone()
+                .into_iter()
+                .map(|metric| {
+                    let path = entry.as_ref().unwrap().path();
+                    let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
+
+                    Command::new("hyperfine")
+                        .current_dir(&path)
+                        // warms filesystem caches by running the command first without counting it.
+                        // alternatively we could clear them before each run
+                        .arg("--warmup")
+                        .arg("1")
+                        .arg("--prepare")
+                        .arg(metric.prepare)
+                        .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
+                        .arg("--export-json")
+                        .arg(
+                            ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""),
+                        )
+                        // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
+                        .arg("--show-output")
+                        .status() // use spawn() here instead for more information
+                })
+                .collect::<Vec<Result<ExitStatus, io::Error>>>()
+        })
+        .flatten()
+        .collect()
+}
+
+// exits the program given measurement results
+pub fn proper_exit(statuses: Vec<ExitStatus>) {
+    for status in statuses {
+        match status.code() {
+            None => (),
+            Some(0) => (),
+            // if any child command exited with a non zero status code, exit with the same one here.
+            Some(nonzero) => exit(nonzero),
+        }
+    }
+}

From 868fd64adf878d4bd06de4b07391b025f5470d01 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:08:00 -0400
Subject: [PATCH 28/81] download correct artifact

---
 .github/workflows/performance.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 7267503bd24..2ed046c2290 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -146,7 +146,7 @@ jobs:
         run: ls
       - uses: actions/download-artifact@v2
         with:
-          name: comparison
+          name: runner
       - name: change permissions
         run: chmod +x ./runner
       - name: run calculation

From 95116dbb5b641faed1faba70821e784e9869cb57 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:12:25 -0400
Subject: [PATCH 29/81] fix measure call

---
 .github/workflows/performance.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 2ed046c2290..8d5bc864b3d 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -76,7 +76,7 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run
-        run: ./runner -p ${{ github.workspace }}/performance/projects/ -b dev
+        run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/
       - uses: actions/upload-artifact@v2
         with:
           name: dev-results
@@ -117,13 +117,13 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run runner
-        run: ./runner -p ${{ github.workspace }}/performance/projects/ -b latest
+        run: ./runner measure -b latest -p ${{ github.workspace }}/performance/projects/
       - uses: actions/upload-artifact@v2
         with:
           name: latest-results
           path: performance/results/
 
-  # run the comparison app on the result output generated from measuring
+  # detect regressions on the output generated from measuring
   # the two branches. Exits with non-zero code if a regression is detected.
   calculate-regressions:
     needs: [measure-dev, measure-latest]

From f1f99a23717184ee9617fdd9edd467f97668674a Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:15:44 -0400
Subject: [PATCH 30/81] add name to action

---
 .github/workflows/performance.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 8d5bc864b3d..cb994c7f53d 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -1,4 +1,5 @@
 
+name: Performance Regression Testing
 # Schedule triggers
 on:
   # TODO this is just while developing

From 03332b2955e1eafd1486ed2bdc5b83b646fe4f8d Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:32:56 -0400
Subject: [PATCH 31/81] more gracefully exit than unwrapping

---
 performance/runner/src/calculate.rs |  4 ++--
 performance/runner/src/main.rs      | 22 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index c2e0d592b4b..2ac693652e3 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -160,8 +160,8 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestE
     })
 }
 
-pub fn exit_properly(regressions: &[Regression]) {
-    match regressions {
+pub fn exit_properly(regressions: Vec<Regression>) {
+    match regressions[..] {
         [] => println!("congrats! no regressions"),
         _ => {
             for r in regressions {
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index d38d823870c..bd4eff6f832 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -3,7 +3,9 @@ extern crate structopt;
 mod calculate;
 mod measure;
 
+use std::fmt::Display;
 use std::path::PathBuf;
+use std::process::exit;
 use structopt::StructOpt;
 
 #[derive(Clone, Debug, StructOpt)]
@@ -25,13 +27,29 @@ enum Opt {
     },
 }
 
+fn gracefully_exit_or<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) -> B {
+    match r {
+        Err(e) => {
+            println!("{}", e);
+            exit(1)
+        },
+        Ok(x) => f(x),
+    }
+}
+
 fn main() {
     match Opt::from_args() {
         Opt::Measure { projects_dir, branch_name } => {
-            measure::proper_exit(measure::measure(&projects_dir, &branch_name).unwrap())
+            gracefully_exit_or(
+                &measure::proper_exit,
+                measure::measure(&projects_dir, &branch_name)
+            )
         },
         Opt::Calculate { results_dir } => {
-            calculate::exit_properly(&calculate::regressions(&results_dir).unwrap())
+            gracefully_exit_or(
+                &calculate::exit_properly,
+                calculate::regressions(&results_dir)
+            )
         },
     }
 }

From b02875a12b01fb91fcea46f3276200196c8dc817 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:35:32 -0400
Subject: [PATCH 32/81] add debug line

---
 performance/runner/src/calculate.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 2ac693652e3..003e173d790 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -151,6 +151,7 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestE
             .collect();
 
         println!("checking regressions with the following measurements:");
+        println!("length: {}", &v_next.len());
         for (path, _) in &v_next {
             // TODO this printed nothing. What's the size of this vector?
             println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))

From c182c05c2fa96ff9e8e19f77bc30c83b06d3e1c9 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:48:21 -0400
Subject: [PATCH 33/81] error when there are no results to process

---
 performance/runner/src/calculate.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 003e173d790..b4e7027a93c 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -51,6 +51,8 @@ pub enum TestError {
     FilenameNotUnicodeErr(PathBuf),
     #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     BadFileContentsErr(PathBuf, Option<io::Error>),
+    #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    NoResultsErr(PathBuf),
 }
 
 fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
@@ -144,6 +146,12 @@ fn calculate_regressions(
 
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestError> {
     measurements_from_files(Path::new(&results_directory)).and_then(|v| {
+        // exit early with an Err if there are no results to process
+        match v[..] {
+            [] => Err(TestError::NoResultsErr(results_directory.clone())),
+            _ => Ok(())
+        }?;
+
         let v_next: Vec<(PathBuf, Measurement)> = v
             .into_iter()
             // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
@@ -151,7 +159,6 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestE
             .collect();
 
         println!("checking regressions with the following measurements:");
-        println!("length: {}", &v_next.len());
         for (path, _) in &v_next {
             // TODO this printed nothing. What's the size of this vector?
             println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))
@@ -209,6 +216,11 @@ Filepath: dummy/path/no_file/"#
 Filepath: dummy/path/filenotexist.json
 Originating Exception:None"#
             ),
+            (
+                TestError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"NoResultsErr: The results directory has no json files in it.
+Filepath: dummy/path/no_file/"#
+            ),
         ];
 
         for (err, msg) in pairs {

From 37e86257f55a57798d920cfc877e5251614e70cc Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 12:57:19 -0400
Subject: [PATCH 34/81] point to the downloaded artifacts correctly

---
 .github/workflows/performance.yml | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index cb994c7f53d..f57683370e7 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -101,7 +101,7 @@ jobs:
           python-version: '3.8'
       - name: move repo up a level
         run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest
-      - name: debug line
+      - name: "[debug] ls new dbt location"
         run: ls ${{ github.workspace }}/../latest/dbt/
       # installation creates egg-links so we have to preserve source
       - name: install dbt from new location
@@ -132,18 +132,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
       - uses: actions/download-artifact@v2
         with:
           name: dev-results
       - uses: actions/download-artifact@v2
         with:
           name: latest-results
-      - name: ls result files
+      - name: "[debug] ls result files"
         run: ls
       - uses: actions/download-artifact@v2
         with:
@@ -151,4 +146,4 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run calculation
-        run: ./runner calculate -r performance/results/
+        run: ./runner calculate -r ./

From 80244a09fe41189a5361a70504668656c8421ee8 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 14:00:30 -0400
Subject: [PATCH 35/81] add error for groups that are not two elements large

---
 performance/runner/src/calculate.rs | 47 ++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index b4e7027a93c..14c65e4dae9 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -33,7 +33,7 @@ pub struct Regression {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-struct MeasurementGroup {
+pub struct MeasurementGroup {
     version: String,
     run: String,
     measurement: Measurement,
@@ -53,6 +53,10 @@ pub enum TestError {
     BadFileContentsErr(PathBuf, Option<io::Error>),
     #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
     NoResultsErr(PathBuf),
+    #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())]
+    OddResultsCountErr(usize, PathBuf),
+    #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
+    BadGroupSizeErr(usize, Vec<MeasurementGroup>),
 }
 
 fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
@@ -88,7 +92,6 @@ fn measurements_from_files(
                 .map_or(false, |ext| ext.ends_with("json"))
         })
         .map(|filename| {
-            println!("{:?}", filename);
             fs::read_to_string(&filename)
                 .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e))))
                 .and_then(|contents| {
@@ -136,10 +139,13 @@ fn calculate_regressions(
         .into_iter()
         .map(|(_, group)| {
             let g: Vec<MeasurementGroup> = group.collect();
-            regression(&g[0].measurement, &g[1].measurement)
+            match g.len() {
+                2 => regression(&g[0].measurement, &g[1].measurement).map(Ok),
+                i => Some(Err(TestError::BadGroupSizeErr(i, g))),
+            }
         })
         .flatten()
-        .collect();
+        .collect::<Result<Vec<Regression>, TestError>>()?;
 
     Ok(x)
 }
@@ -147,9 +153,10 @@ fn calculate_regressions(
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestError> {
     measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         // exit early with an Err if there are no results to process
-        match v[..] {
-            [] => Err(TestError::NoResultsErr(results_directory.clone())),
-            _ => Ok(())
+        match v.len() {
+            0 => Err(TestError::NoResultsErr(results_directory.clone())),
+            i if i % 2 == 1 => Err(TestError::OddResultsCountErr(i, results_directory.clone())),
+            _ => Ok(()),
         }?;
 
         let v_next: Vec<(PathBuf, Measurement)> = v
@@ -221,6 +228,32 @@ Originating Exception:None"#
                 r#"NoResultsErr: The results directory has no json files in it.
 Filepath: dummy/path/no_file/"#
             ),
+            (
+                TestError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
+File Count: 3
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                TestError::BadGroupSizeErr(1, vec![MeasurementGroup {
+                    version: "dev".to_owned(),
+                    run: "some command".to_owned(),
+                    measurement: Measurement {
+                        command: "some command".to_owned(),
+                        mean: 1.0,
+                        stddev: 1.0,
+                        median: 1.0,
+                        user: 1.0,
+                        system: 1.0,
+                        min: 1.0,
+                        max: 1.0,
+                        times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
+                    }
+                }]),
+                r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
+Count: 1
+Group: [("dev", "some command")]"#
+            ),
         ];
 
         for (err, msg) in pairs {

From faff8c00b3880eb3ef79424fcfa949747c14fdad Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 14:37:49 -0400
Subject: [PATCH 36/81] group by run only

---
 performance/runner/src/calculate.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 14c65e4dae9..f755b0c3f1d 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -121,7 +121,7 @@ fn calculate_regressions(
                     let parts: Vec<&str> = name.split("_").collect();
                     MeasurementGroup {
                         version: parts[0].to_owned(),
-                        run: parts[1..].join(""),
+                        run: parts[1..].join("_"),
                         measurement: m.clone(),
                     }
                 })
@@ -135,7 +135,7 @@ fn calculate_regressions(
 
     let x: Vec<Regression> = sorted_measurement_groups
         .into_iter()
-        .group_by(|x| (x.run.clone(), x.version.clone()))
+        .group_by(|x| x.run.clone())
         .into_iter()
         .map(|(_, group)| {
             let g: Vec<MeasurementGroup> = group.collect();

From 757614d57f8769d7fd9e3c0f9ba2537236b28433 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 14:59:21 -0400
Subject: [PATCH 37/81] add stddev regression

---
 performance/runner/src/calculate.rs | 69 ++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 21 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index f755b0c3f1d..d0cfc378903 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -25,11 +25,19 @@ struct Measurements {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-pub struct Regression {
-    threshold: f64,
-    difference: f64,
-    baseline: Measurement,
-    latest: Measurement,
+pub enum Regression {
+    Median {
+        threshold: f64,
+        difference: f64,
+        baseline: f64,
+        latest: f64,
+    },
+    StandardDeviation {
+        threshold: f64,
+        difference: f64,
+        baseline: f64,
+        latest: f64,
+    },
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -59,19 +67,38 @@ pub enum TestError {
     BadGroupSizeErr(usize, Vec<MeasurementGroup>),
 }
 
-fn regression(baseline: &Measurement, latest: &Measurement) -> Option<Regression> {
-    let threshold = 1.05; // 5% regression threshold
-    let difference = latest.median / baseline.median;
-    if difference > threshold {
-        Some(Regression {
-            threshold,
-            difference,
-            baseline: baseline.clone(),
-            latest: latest.clone(),
-        })
-    } else {
-        None
-    }
+fn regression(baseline: &Measurement, latest: &Measurement) -> Vec<Regression> {
+    let median_threshold = 1.05; // 5% regression threshold
+    let median_difference = latest.median / baseline.median;
+
+    let stddev_threshold = 1.05; // 5% regression threshold
+    let stddev_difference = latest.stddev / baseline.stddev;
+
+    let mut regressions = vec![];
+
+    if median_difference > median_threshold {
+        regressions.push(
+            Regression::Median {
+                threshold: median_threshold,
+                difference: median_difference,
+                baseline: baseline.median.clone(),
+                latest: latest.median.clone(),
+            }
+        )
+    };
+
+    if stddev_difference > stddev_threshold {
+        regressions.push(
+            Regression::StandardDeviation {
+                threshold: stddev_threshold,
+                difference: stddev_difference,
+                baseline: baseline.stddev.clone(),
+                latest: latest.stddev.clone(),
+            }
+        )
+    };
+
+    regressions
 }
 
 // given a directory, read all files in the directory and return each
@@ -140,8 +167,8 @@ fn calculate_regressions(
         .map(|(_, group)| {
             let g: Vec<MeasurementGroup> = group.collect();
             match g.len() {
-                2 => regression(&g[0].measurement, &g[1].measurement).map(Ok),
-                i => Some(Err(TestError::BadGroupSizeErr(i, g))),
+                2 => regression(&g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(),
+                i => vec![Err(TestError::BadGroupSizeErr(i, g))],
             }
         })
         .flatten()
@@ -180,7 +207,7 @@ pub fn exit_properly(regressions: Vec<Regression>) {
         [] => println!("congrats! no regressions"),
         _ => {
             for r in regressions {
-                println!("{:?}", r);
+                println!("{:#?}", r);
             }
             println!("the above regressions were found.");
             exit(1)

From ed91ded2c1dc140163c3291797e973db0f596aee Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 15:04:05 -0400
Subject: [PATCH 38/81] branches named dev and baseline

---
 .github/workflows/performance.yml   | 18 +++++++++---------
 performance/runner/src/calculate.rs | 14 +++++++-------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index f57683370e7..69a27e65af3 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -86,9 +86,9 @@ jobs:
   # run the performance measurements on the latest release branch
   # this part takes by far the longest, so we do everything we can first
   # so the job fails fast.
-  measure-latest:
+  measure-baseline:
     needs: [build-runner]
-    name: Measure Latest Branch
+    name: Measure Baseline Branch
     runs-on: ubuntu-latest
     steps:
       - name: checkout latest
@@ -100,12 +100,12 @@ jobs:
         with:
           python-version: '3.8'
       - name: move repo up a level
-        run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest
+        run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline
       - name: "[debug] ls new dbt location"
-        run: ls ${{ github.workspace }}/../latest/dbt/
+        run: ls ${{ github.workspace }}/../baseline/dbt/
       # installation creates egg-links so we have to preserve source
       - name: install dbt from new location
-        run: cd ${{ github.workspace }}/../latest/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
+        run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
       # checkout the current branch to get all the target projects
       # this deletes the old checked out code which is why we had to copy before
       - name: checkout dev
@@ -118,16 +118,16 @@ jobs:
       - name: change permissions
         run: chmod +x ./runner
       - name: run runner
-        run: ./runner measure -b latest -p ${{ github.workspace }}/performance/projects/
+        run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/
       - uses: actions/upload-artifact@v2
         with:
-          name: latest-results
+          name: baseline-results
           path: performance/results/
 
   # detect regressions on the output generated from measuring
   # the two branches. Exits with non-zero code if a regression is detected.
   calculate-regressions:
-    needs: [measure-dev, measure-latest]
+    needs: [measure-dev, measure-baseline]
     name: Compare Results
     runs-on: ubuntu-latest
     steps:
@@ -137,7 +137,7 @@ jobs:
           name: dev-results
       - uses: actions/download-artifact@v2
         with:
-          name: latest-results
+          name: baseline-results
       - name: "[debug] ls result files"
         run: ls
       - uses: actions/download-artifact@v2
diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index d0cfc378903..59315c26ad8 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -30,13 +30,13 @@ pub enum Regression {
         threshold: f64,
         difference: f64,
         baseline: f64,
-        latest: f64,
+        dev: f64,
     },
     StandardDeviation {
         threshold: f64,
         difference: f64,
         baseline: f64,
-        latest: f64,
+        dev: f64,
     },
 }
 
@@ -67,12 +67,12 @@ pub enum TestError {
     BadGroupSizeErr(usize, Vec<MeasurementGroup>),
 }
 
-fn regression(baseline: &Measurement, latest: &Measurement) -> Vec<Regression> {
+fn regression(baseline: &Measurement, dev: &Measurement) -> Vec<Regression> {
     let median_threshold = 1.05; // 5% regression threshold
-    let median_difference = latest.median / baseline.median;
+    let median_difference = dev.median / baseline.median;
 
     let stddev_threshold = 1.05; // 5% regression threshold
-    let stddev_difference = latest.stddev / baseline.stddev;
+    let stddev_difference = dev.stddev / baseline.stddev;
 
     let mut regressions = vec![];
 
@@ -82,7 +82,7 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Vec<Regression> {
                 threshold: median_threshold,
                 difference: median_difference,
                 baseline: baseline.median.clone(),
-                latest: latest.median.clone(),
+                dev: dev.median.clone(),
             }
         )
     };
@@ -93,7 +93,7 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Vec<Regression> {
                 threshold: stddev_threshold,
                 difference: stddev_difference,
                 baseline: baseline.stddev.clone(),
-                latest: latest.stddev.clone(),
+                dev: dev.stddev.clone(),
             }
         )
     };

From fe24dd43d4dda8a5fa4d05ada918f8b432f1bc9a Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 16:10:16 -0400
Subject: [PATCH 39/81] return all calculations not just regressions

---
 performance/runner/src/calculate.rs | 68 ++++++++++++++---------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 59315c26ad8..2a10d88a4c4 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -25,19 +25,18 @@ struct Measurements {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-pub enum Regression {
-    Median {
-        threshold: f64,
-        difference: f64,
-        baseline: f64,
-        dev: f64,
-    },
-    StandardDeviation {
-        threshold: f64,
-        difference: f64,
-        baseline: f64,
-        dev: f64,
-    },
+struct Data {
+    threshold: f64,
+    difference: f64,
+    baseline: f64,
+    dev: f64,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Calculation {
+    metric: String,
+    regression: bool,
+    data: Data,
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -67,38 +66,35 @@ pub enum TestError {
     BadGroupSizeErr(usize, Vec<MeasurementGroup>),
 }
 
-fn regression(baseline: &Measurement, dev: &Measurement) -> Vec<Regression> {
+fn calculate(metric: &str, baseline: &Measurement, dev: &Measurement) -> Vec<Calculation> {
     let median_threshold = 1.05; // 5% regression threshold
     let median_difference = dev.median / baseline.median;
 
     let stddev_threshold = 1.05; // 5% regression threshold
     let stddev_difference = dev.stddev / baseline.stddev;
 
-    let mut regressions = vec![];
-
-    if median_difference > median_threshold {
-        regressions.push(
-            Regression::Median {
+    vec![
+        Calculation {
+            metric: ["median", metric].join("_"),
+            regression: median_difference > median_threshold,
+            data: Data {
                 threshold: median_threshold,
                 difference: median_difference,
                 baseline: baseline.median.clone(),
                 dev: dev.median.clone(),
             }
-        )
-    };
-
-    if stddev_difference > stddev_threshold {
-        regressions.push(
-            Regression::StandardDeviation {
+        },
+        Calculation {
+            metric: ["stddev", metric].join("_"),
+            regression: stddev_difference > stddev_threshold,
+            data: Data {
                 threshold: stddev_threshold,
                 difference: stddev_difference,
                 baseline: baseline.stddev.clone(),
                 dev: dev.stddev.clone(),
             }
-        )
-    };
-
-    regressions
+        },
+    ]
 }
 
 // given a directory, read all files in the directory and return each
@@ -134,7 +130,7 @@ fn measurements_from_files(
 // measurements together by filename
 fn calculate_regressions(
     measurements: &[(PathBuf, Measurement)],
-) -> Result<Vec<Regression>, TestError> {
+) -> Result<Vec<Calculation>, TestError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
         .into_iter()
         .map(|(p, m)| {
@@ -160,24 +156,24 @@ fn calculate_regressions(
     // locking up mutation
     let sorted_measurement_groups = measurement_groups;
 
-    let x: Vec<Regression> = sorted_measurement_groups
+    let x: Vec<Calculation> = sorted_measurement_groups
         .into_iter()
         .group_by(|x| x.run.clone())
         .into_iter()
         .map(|(_, group)| {
             let g: Vec<MeasurementGroup> = group.collect();
             match g.len() {
-                2 => regression(&g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(),
+                2 => calculate(&g[0].run, &g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(),
                 i => vec![Err(TestError::BadGroupSizeErr(i, g))],
             }
         })
         .flatten()
-        .collect::<Result<Vec<Regression>, TestError>>()?;
+        .collect::<Result<Vec<Calculation>, TestError>>()?;
 
     Ok(x)
 }
 
-pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestError> {
+pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, TestError> {
     measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         // exit early with an Err if there are no results to process
         match v.len() {
@@ -202,13 +198,15 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Regression>, TestE
     })
 }
 
-pub fn exit_properly(regressions: Vec<Regression>) {
+pub fn exit_properly(calculations: Vec<Calculation>) {
+    let regressions = calculations.into_iter().filter(|c| c.regression).collect::<Vec<Calculation>>();
     match regressions[..] {
         [] => println!("congrats! no regressions"),
         _ => {
             for r in regressions {
                 println!("{:#?}", r);
             }
+            println!("");
             println!("the above regressions were found.");
             exit(1)
         }

From 873e9714f8f8d447be009e6a75c22189ed38f198 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 16:22:02 -0400
Subject: [PATCH 40/81] move io operations to main

---
 performance/runner/src/calculate.rs | 67 ++++++++++-------------------
 performance/runner/src/main.rs      | 32 ++++++++++++--
 2 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 2a10d88a4c4..655a4498609 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -7,43 +7,43 @@ use std::borrow::Cow;
 use thiserror::Error;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-struct Measurement {
-    command: String,
-    mean: f64,
-    stddev: f64,
-    median: f64,
-    user: f64,
-    system: f64,
-    min: f64,
-    max: f64,
-    times: Vec<f64>,
+pub struct Measurement {
+    pub command: String,
+    pub mean: f64,
+    pub stddev: f64,
+    pub median: f64,
+    pub user: f64,
+    pub system: f64,
+    pub min: f64,
+    pub max: f64,
+    pub times: Vec<f64>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-struct Measurements {
-    results: Vec<Measurement>,
+pub struct Measurements {
+    pub results: Vec<Measurement>,
 }
 
 #[derive(Debug, Clone, PartialEq)]
-struct Data {
-    threshold: f64,
-    difference: f64,
-    baseline: f64,
-    dev: f64,
+pub struct Data {
+    pub threshold: f64,
+    pub difference: f64,
+    pub baseline: f64,
+    pub dev: f64,
 }
 
 #[derive(Debug, Clone, PartialEq)]
 pub struct Calculation {
-    metric: String,
-    regression: bool,
-    data: Data,
+    pub metric: String,
+    pub regression: bool,
+    pub data: Data,
 }
 
 #[derive(Debug, Clone, PartialEq)]
 pub struct MeasurementGroup {
-    version: String,
-    run: String,
-    measurement: Measurement,
+    pub version: String,
+    pub run: String,
+    pub measurement: Measurement,
 }
 
 #[derive(Error, Debug)]
@@ -188,31 +188,10 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, Test
             .map(|(p, ms)| (p, ms.results[0].clone()))
             .collect();
 
-        println!("checking regressions with the following measurements:");
-        for (path, _) in &v_next {
-            // TODO this printed nothing. What's the size of this vector?
-            println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file")))
-        }
-
         calculate_regressions(&v_next)
     })
 }
 
-pub fn exit_properly(calculations: Vec<Calculation>) {
-    let regressions = calculations.into_iter().filter(|c| c.regression).collect::<Vec<Calculation>>();
-    match regressions[..] {
-        [] => println!("congrats! no regressions"),
-        _ => {
-            for r in regressions {
-                println!("{:#?}", r);
-            }
-            println!("");
-            println!("the above regressions were found.");
-            exit(1)
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index bd4eff6f832..bdbd48e8951 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -3,6 +3,7 @@ extern crate structopt;
 mod calculate;
 mod measure;
 
+use crate::calculate::Calculation;
 use std::fmt::Display;
 use std::path::PathBuf;
 use std::process::exit;
@@ -37,6 +38,8 @@ fn gracefully_exit_or<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) ->
     }
 }
 
+// This is where all the printing, exiting, and error displaying 
+// should happen. Module functions should only return values.
 fn main() {
     match Opt::from_args() {
         Opt::Measure { projects_dir, branch_name } => {
@@ -46,10 +49,33 @@ fn main() {
             )
         },
         Opt::Calculate { results_dir } => {
-            gracefully_exit_or(
-                &calculate::exit_properly,
+            let calculations = gracefully_exit_or(
+                &|x| x,
                 calculate::regressions(&results_dir)
-            )
+            );
+
+            println!(":: All Calculations ::\n");
+            for c in &calculations {
+                println!("{:#?}", c);
+            }
+            println!("");
+
+            let regressions: Vec<Calculation> = calculations
+                .into_iter()
+                .filter(|c| c.regression)
+                .collect();
+
+            match regressions[..] {
+                [] => println!("congrats! no regressions :)"),
+                _ => {
+                    println!(":: Regressions Found ::\n");
+                    for r in regressions {
+                        println!("{:#?}", r);
+                    }
+                    println!("");
+                    exit(1)
+                }
+            }
         },
     }
 }

From 3004969a933275ca8e2b09157a9cc52a0285c68a Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 16:26:29 -0400
Subject: [PATCH 41/81] move measure io to main

---
 performance/runner/src/calculate.rs |  2 --
 performance/runner/src/main.rs      | 21 ++++++++++++++++++---
 performance/runner/src/measure.rs   | 14 +-------------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 655a4498609..827ae2f6228 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -1,9 +1,7 @@
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
-use std::process::exit;
 use std::{fs, io};
-use std::borrow::Cow;
 use thiserror::Error;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index bdbd48e8951..8980bf23ea3 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -42,12 +42,26 @@ fn gracefully_exit_or<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) ->
 // should happen. Module functions should only return values.
 fn main() {
     match Opt::from_args() {
+
         Opt::Measure { projects_dir, branch_name } => {
-            gracefully_exit_or(
-                &measure::proper_exit,
+            let statuses = gracefully_exit_or(
+                &|x| x,
                 measure::measure(&projects_dir, &branch_name)
-            )
+            );
+
+            for status in statuses {
+                match status.code() {
+                    None => (),
+                    Some(0) => (),
+                    // if any child command exited with a non zero status code, exit with the same one here.
+                    Some(nonzero) => {
+                        println!("a child process exited with a nonzero status code.");
+                        exit(nonzero)
+                    },
+                }
+            }
         },
+
         Opt::Calculate { results_dir } => {
             let calculations = gracefully_exit_or(
                 &|x| x,
@@ -77,5 +91,6 @@ fn main() {
                 }
             }
         },
+
     }
 }
diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index 4bf8cf20f91..91222aa5f1a 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -1,7 +1,7 @@
 extern crate structopt;
 
 use std::path::PathBuf;
-use std::process::{exit, Command, ExitStatus};
+use std::process::{Command, ExitStatus};
 use std::{fs, io};
 use structopt::StructOpt;
 
@@ -83,15 +83,3 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
         .flatten()
         .collect()
 }
-
-// exits the program given measurement results
-pub fn proper_exit(statuses: Vec<ExitStatus>) {
-    for status in statuses {
-        match status.code() {
-            None => (),
-            Some(0) => (),
-            // if any child command exited with a non zero status code, exit with the same one here.
-            Some(nonzero) => exit(nonzero),
-        }
-    }
-}

From 4e020c387802b3037acf4797f762ce32bafaaaa4 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 16:50:08 -0400
Subject: [PATCH 42/81] enforce branch names at calculation time

---
 performance/runner/src/calculate.rs | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 827ae2f6228..42dbbf0ade4 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -62,9 +62,11 @@ pub enum TestError {
     OddResultsCountErr(usize, PathBuf),
     #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
     BadGroupSizeErr(usize, Vec<MeasurementGroup>),
+    #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)]
+    BadBranchNameErr(String, String),
 }
 
-fn calculate(metric: &str, baseline: &Measurement, dev: &Measurement) -> Vec<Calculation> {
+fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
     let median_threshold = 1.05; // 5% regression threshold
     let median_difference = dev.median / baseline.median;
 
@@ -159,9 +161,20 @@ fn calculate_regressions(
         .group_by(|x| x.run.clone())
         .into_iter()
         .map(|(_, group)| {
-            let g: Vec<MeasurementGroup> = group.collect();
+            let mut g: Vec<MeasurementGroup> = group.collect();
+            g.sort_by(|x, y| x.version.cmp(&y.version));
+
             match g.len() {
-                2 => calculate(&g[0].run, &g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(),
+                2 => {
+                    let dev = &g[0];
+                    let baseline = &g[1];
+                    
+                    if dev.version == "dev" && baseline.version == "baseline" {
+                        calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect()
+                    } else {
+                        vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))]
+                    }
+                },
                 i => vec![Err(TestError::BadGroupSizeErr(i, g))],
             }
         })
@@ -256,6 +269,14 @@ Filepath: dummy/path/no_file/"#
 Count: 1
 Group: [("dev", "some command")]"#
             ),
+            (
+                TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()).to_path_buf(),
+                r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
+Found: boop, noop"#
+            ),
+
+
+            
         ];
 
         for (err, msg) in pairs {

From 5891b59790d4c3e9d6d2e36c6a260e2ba9034e0b Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 16:52:28 -0400
Subject: [PATCH 43/81] better variable names

---
 performance/runner/src/calculate.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 42dbbf0ade4..9d2318128b7 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -156,18 +156,18 @@ fn calculate_regressions(
     // locking up mutation
     let sorted_measurement_groups = measurement_groups;
 
-    let x: Vec<Calculation> = sorted_measurement_groups
+    let calculations: Vec<Calculation> = sorted_measurement_groups
         .into_iter()
         .group_by(|x| x.run.clone())
         .into_iter()
-        .map(|(_, group)| {
-            let mut g: Vec<MeasurementGroup> = group.collect();
-            g.sort_by(|x, y| x.version.cmp(&y.version));
+        .map(|(_, g)| {
+            let mut groups: Vec<MeasurementGroup> = g.collect();
+            groups.sort_by(|x, y| x.version.cmp(&y.version));
 
-            match g.len() {
+            match groups.len() {
                 2 => {
-                    let dev = &g[0];
-                    let baseline = &g[1];
+                    let dev = &groups[0];
+                    let baseline = &groups[1];
                     
                     if dev.version == "dev" && baseline.version == "baseline" {
                         calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect()
@@ -175,13 +175,13 @@ fn calculate_regressions(
                         vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))]
                     }
                 },
-                i => vec![Err(TestError::BadGroupSizeErr(i, g))],
+                i => vec![Err(TestError::BadGroupSizeErr(i, groups))],
             }
         })
         .flatten()
         .collect::<Result<Vec<Calculation>, TestError>>()?;
 
-    Ok(x)
+    Ok(calculations)
 }
 
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, TestError> {

From 935c1387361014625532b554de1f672660deea1f Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:03:37 -0400
Subject: [PATCH 44/81] type gymnastics

---
 performance/runner/src/calculate.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 9d2318128b7..3f0f147af68 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -166,20 +166,20 @@ fn calculate_regressions(
 
             match groups.len() {
                 2 => {
-                    let dev = &groups[0];
-                    let baseline = &groups[1];
+                    let dev = &groups[1];
+                    let baseline = &groups[0];
                     
                     if dev.version == "dev" && baseline.version == "baseline" {
-                        calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect()
+                        Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement))
                     } else {
-                        vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))]
+                        Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))
                     }
                 },
-                i => vec![Err(TestError::BadGroupSizeErr(i, groups))],
+                i => Err(TestError::BadGroupSizeErr(i, groups)),
             }
         })
-        .flatten()
-        .collect::<Result<Vec<Calculation>, TestError>>()?;
+        .collect::<Result<Vec<Vec<Calculation>>, TestError>>()?
+        .concat();
 
     Ok(calculations)
 }

From 64bf9c8885caf5c77fcf4dee9b1af9b61bab7c51 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:04:13 -0400
Subject: [PATCH 45/81] test fix

---
 performance/runner/src/calculate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 3f0f147af68..03b2a24b32e 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -270,7 +270,7 @@ Count: 1
 Group: [("dev", "some command")]"#
             ),
             (
-                TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()).to_path_buf(),
+                TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
                 r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
 Found: boop, noop"#
             ),

From 252280b56e1ab64364e9164ab434408c603d3dc6 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:34:10 -0400
Subject: [PATCH 46/81] write out results as artifact

---
 .github/workflows/performance.yml   |  4 ++++
 performance/runner/src/calculate.rs |  4 ++--
 performance/runner/src/main.rs      | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 69a27e65af3..6d94f2954d7 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -147,3 +147,7 @@ jobs:
         run: chmod +x ./runner
       - name: run calculation
         run: ./runner calculate -r ./
+      - uses: actions/upload-artifact@v2
+        with:
+          name: final-calculations
+          path: ./final_calculations.json
diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 03b2a24b32e..08a0de961bd 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -22,7 +22,7 @@ pub struct Measurements {
     pub results: Vec<Measurement>,
 }
 
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Data {
     pub threshold: f64,
     pub difference: f64,
@@ -30,7 +30,7 @@ pub struct Data {
     pub dev: f64,
 }
 
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Calculation {
     pub metric: String,
     pub regression: bool,
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 8980bf23ea3..61efee5a189 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -5,6 +5,8 @@ mod measure;
 
 use crate::calculate::Calculation;
 use std::fmt::Display;
+use std::fs::File;
+use std::io::Write;
 use std::path::PathBuf;
 use std::process::exit;
 use structopt::StructOpt;
@@ -68,17 +70,30 @@ fn main() {
                 calculate::regressions(&results_dir)
             );
 
+            // print calculations to stdout
             println!(":: All Calculations ::\n");
             for c in &calculations {
-                println!("{:#?}", c);
+                println!("{:#?}\n", c);
             }
             println!("");
 
+            // write calculations to file so it can be downloaded as an artifact
+            let json_calcs = serde_json::to_string(&calculations)
+                .expect("failed to serialize calculations to json");
+            
+            let outfile = &mut results_dir.into_os_string();
+            outfile.push("/final_calculations.json");
+
+            let mut f = File::create(outfile).expect("Unable to create file");
+            f.write_all(json_calcs.as_bytes()).expect("unable to write data");
+
+            // filter for regressions
             let regressions: Vec<Calculation> = calculations
                 .into_iter()
                 .filter(|c| c.regression)
                 .collect();
 
+            // exit with non zero exit code if there are regressions
             match regressions[..] {
                 [] => println!("congrats! no regressions :)"),
                 _ => {

From bad0198a362bb305b4f6fdd01708b5ab95d130e2 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:55:03 -0400
Subject: [PATCH 47/81] minor readme changes

---
 performance/README.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/performance/README.md b/performance/README.md
index 8b9ce3058ab..aa680b6bae3 100644
--- a/performance/README.md
+++ b/performance/README.md
@@ -1,13 +1,14 @@
-# Performance Testing
+# Performance Regression Testing
 This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
 
-## Adding a new project
-Just make a new directory under projects. It will automatically be picked up by the tests.
+## Adding a new dbt project
+Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.
 
 ## Adding a new dbt command
-In `runner/src/main.rs` add a metric to the `metrics` Vec in the main function. The Github Action will handle recompilation.
+In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed.
 
 ## Future work
 - add more projects to test different configurations that have been known bottlenecks
 - add more dbt commands to measure
-- consider storing these results so they can be graphed over time
+- possibly using the uploaded json artifacts to store these results so they can be graphed over time
+- reading new metrics from a file so no one has to edit rust source to add them to the suite

From 580b1fdd688e3fe7fa9fc01a459e1f946d8bddae Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:56:30 -0400
Subject: [PATCH 48/81] minor print change

---
 performance/runner/src/main.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 61efee5a189..e3e07b7117a 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -98,8 +98,9 @@ fn main() {
                 [] => println!("congrats! no regressions :)"),
                 _ => {
                     println!(":: Regressions Found ::\n");
+                    println!("");
                     for r in regressions {
-                        println!("{:#?}", r);
+                        println!("{:#?}\n", r);
                     }
                     println!("");
                     exit(1)

From 1f189f522556ce54dd2b4caf35606151f3cf6652 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 17:59:53 -0400
Subject: [PATCH 49/81] added small paragraph to readme

---
 performance/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/performance/README.md b/performance/README.md
index aa680b6bae3..341e9da4333 100644
--- a/performance/README.md
+++ b/performance/README.md
@@ -1,6 +1,8 @@
 # Performance Regression Testing
 This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
 
+The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards.
+
 ## Adding a new dbt project
 Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.
 

From 486afa9fcd2728eb8353241a8c43da671f374ac3 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Tue, 3 Aug 2021 18:04:22 -0400
Subject: [PATCH 50/81] upload results even when regressions are detected

---
 .github/workflows/performance.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 6d94f2954d7..01fa3855e5f 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -147,7 +147,9 @@ jobs:
         run: chmod +x ./runner
       - name: run calculation
         run: ./runner calculate -r ./
+        # always attempt to upload the results even if there were regressions found
       - uses: actions/upload-artifact@v2
+        if: ${{ always() }}
         with:
           name: final-calculations
           path: ./final_calculations.json

From ba9d76b3f9fe641a883fca24dd71a9b48f72abc2 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 10:11:09 -0400
Subject: [PATCH 51/81] make final json human readable

---
 performance/runner/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index e3e07b7117a..5c289bf3a27 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -78,7 +78,7 @@ fn main() {
             println!("");
 
             // write calculations to file so it can be downloaded as an artifact
-            let json_calcs = serde_json::to_string(&calculations)
+            let json_calcs = serde_json::to_string_pretty(&calculations)
                 .expect("failed to serialize calculations to json");
             
             let outfile = &mut results_dir.into_os_string();

From 287c4d2b031814de0e49361cfbd0b39f75de94cf Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 12:15:29 -0400
Subject: [PATCH 52/81] revamp exception hierarchy

---
 performance/runner/src/calculate.rs  | 140 ++++-----------------------
 performance/runner/src/exceptions.rs | 140 +++++++++++++++++++++++++++
 performance/runner/src/main.rs       |   1 +
 performance/runner/src/measure.rs    |  29 ++++--
 4 files changed, 179 insertions(+), 131 deletions(-)
 create mode 100644 performance/runner/src/exceptions.rs

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 08a0de961bd..3f2f5f52cba 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -1,8 +1,8 @@
+use crate::exceptions::{CalculateError, IOError};
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
-use std::{fs, io};
-use thiserror::Error;
+use std::fs;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurement {
@@ -44,28 +44,6 @@ pub struct MeasurementGroup {
     pub measurement: Measurement,
 }
 
-#[derive(Error, Debug)]
-pub enum TestError {
-    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
-    BadJSONErr(PathBuf, Option<serde_json::Error>),
-    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
-    ReadErr(PathBuf, Option<io::Error>),
-    #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
-    MissingFilenameErr(PathBuf),
-    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
-    FilenameNotUnicodeErr(PathBuf),
-    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
-    BadFileContentsErr(PathBuf, Option<io::Error>),
-    #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
-    NoResultsErr(PathBuf),
-    #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())]
-    OddResultsCountErr(usize, PathBuf),
-    #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
-    BadGroupSizeErr(usize, Vec<MeasurementGroup>),
-    #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)]
-    BadBranchNameErr(String, String),
-}
-
 fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
     let median_threshold = 1.05; // 5% regression threshold
     let median_difference = dev.median / baseline.median;
@@ -101,9 +79,10 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
 // filename with the deserialized json contents of that file
 fn measurements_from_files(
     results_directory: &Path,
-) -> Result<Vec<(PathBuf, Measurements)>, TestError> {
+) -> Result<Vec<(PathBuf, Measurements)>, CalculateError> {
     let result_files = fs::read_dir(results_directory)
-        .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), Some(e))))?;
+        .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
+        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
 
     result_files
         .into_iter()
@@ -116,10 +95,11 @@ fn measurements_from_files(
         })
         .map(|filename| {
             fs::read_to_string(&filename)
-                .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e))))
+                .or_else(|e| Err(IOError::BadFileContentsErr(filename.clone(), Some(e))))
+                .or_else(|e| Err(CalculateError::CalculateIOError(e)))
                 .and_then(|contents| {
                     serde_json::from_str::<Measurements>(&contents)
-                        .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), Some(e))))
+                        .or_else(|e| Err(CalculateError::BadJSONErr(filename.clone(), Some(e))))
                 })
                 .map(|m| (filename, m))
         })
@@ -130,15 +110,15 @@ fn measurements_from_files(
 // measurements together by filename
 fn calculate_regressions(
     measurements: &[(PathBuf, Measurement)],
-) -> Result<Vec<Calculation>, TestError> {
+) -> Result<Vec<Calculation>, CalculateError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
         .into_iter()
         .map(|(p, m)| {
             p.file_name()
-                .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf()))
+                .ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf()))
                 .and_then(|name| {
                     name.to_str()
-                        .ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf()))
+                        .ok_or_else(|| IOError::FilenameNotUnicodeErr(p.to_path_buf()))
                 })
                 .map(|name| {
                     let parts: Vec<&str> = name.split("_").collect();
@@ -149,7 +129,8 @@ fn calculate_regressions(
                     }
                 })
         })
-        .collect::<Result<Vec<MeasurementGroup>, TestError>>()?;
+        .collect::<Result<Vec<MeasurementGroup>, IOError>>()
+        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
 
     measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
 
@@ -172,24 +153,24 @@ fn calculate_regressions(
                     if dev.version == "dev" && baseline.version == "baseline" {
                         Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement))
                     } else {
-                        Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))
+                        Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))
                     }
                 },
-                i => Err(TestError::BadGroupSizeErr(i, groups)),
+                i => Err(CalculateError::BadGroupSizeErr(i, groups)),
             }
         })
-        .collect::<Result<Vec<Vec<Calculation>>, TestError>>()?
+        .collect::<Result<Vec<Vec<Calculation>>, CalculateError>>()?
         .concat();
 
     Ok(calculations)
 }
 
-pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, TestError> {
+pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
     measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         // exit early with an Err if there are no results to process
         match v.len() {
-            0 => Err(TestError::NoResultsErr(results_directory.clone())),
-            i if i % 2 == 1 => Err(TestError::OddResultsCountErr(i, results_directory.clone())),
+            0 => Err(CalculateError::NoResultsErr(results_directory.clone())),
+            i if i % 2 == 1 => Err(CalculateError::OddResultsCountErr(i, results_directory.clone())),
             _ => Ok(()),
         }?;
 
@@ -202,86 +183,3 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, Test
         calculate_regressions(&v_next)
     })
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_error_messages() {
-        let pairs = vec![
-            (
-                TestError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
-                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
-Filepath: dummy/path/file.json
-Originating Exception:None"#
-            ),
-            (
-                TestError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
-                r#"ReadErr: The file cannot be read.
-Filepath: dummy/path/file.json
-Originating Exception:None"#
-            ),
-            (
-                TestError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
-                r#"MissingFilenameErr: The path provided does not specify a file.
-Filepath: dummy/path/no_file/"#
-            ),
-            (
-                TestError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
-                r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
-Filepath: dummy/path/no_file/"#
-            ),
-            (
-                TestError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None),
-                r#"BadFileContentsErr: Check that the file exists and is readable.
-Filepath: dummy/path/filenotexist.json
-Originating Exception:None"#
-            ),
-            (
-                TestError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
-                r#"NoResultsErr: The results directory has no json files in it.
-Filepath: dummy/path/no_file/"#
-            ),
-            (
-                TestError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()),
-                r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
-File Count: 3
-Filepath: dummy/path/no_file/"#
-            ),
-            (
-                TestError::BadGroupSizeErr(1, vec![MeasurementGroup {
-                    version: "dev".to_owned(),
-                    run: "some command".to_owned(),
-                    measurement: Measurement {
-                        command: "some command".to_owned(),
-                        mean: 1.0,
-                        stddev: 1.0,
-                        median: 1.0,
-                        user: 1.0,
-                        system: 1.0,
-                        min: 1.0,
-                        max: 1.0,
-                        times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
-                    }
-                }]),
-                r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
-Count: 1
-Group: [("dev", "some command")]"#
-            ),
-            (
-                TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
-                r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
-Found: boop, noop"#
-            ),
-
-
-            
-        ];
-
-        for (err, msg) in pairs {
-            assert_eq!(format!("{}", err), msg)
-        }
-    }
-
-}
diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs
new file mode 100644
index 00000000000..6166a59dd50
--- /dev/null
+++ b/performance/runner/src/exceptions.rs
@@ -0,0 +1,140 @@
+use crate::calculate::*;
+use std::path::PathBuf;
+use std::io;
+use std::path::Path;
+use thiserror::Error;
+
+
+#[derive(Debug, Error)]
+pub enum IOError {
+    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    ReadErr(PathBuf, Option<io::Error>),
+    #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    MissingFilenameErr(PathBuf),
+    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    FilenameNotUnicodeErr(PathBuf),
+    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    BadFileContentsErr(PathBuf, Option<io::Error>),
+    #[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    CommandErr(Option<io::Error>),
+}
+
+
+#[derive(Debug, Error)]
+pub enum CalculateError {
+    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    BadJSONErr(PathBuf, Option<serde_json::Error>),
+    #[error("{}", .0)]
+    CalculateIOError(IOError),
+    #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
+    NoResultsErr(PathBuf),
+    #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())]
+    OddResultsCountErr(usize, PathBuf),
+    #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
+    BadGroupSizeErr(usize, Vec<MeasurementGroup>),
+    #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)]
+    BadBranchNameErr(String, String),
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_io_error_messages() {
+        let pairs = vec![
+            (
+                IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
+                r#"ReadErr: The file cannot be read.
+Filepath: dummy/path/file.json
+Originating Exception:None"#
+            ),
+            (
+                IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"MissingFilenameErr: The path provided does not specify a file.
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None),
+                r#"BadFileContentsErr: Check that the file exists and is readable.
+Filepath: dummy/path/filenotexist.json
+Originating Exception:None"#
+            ),
+            (
+                IOError::CommandErr(None),
+                r#"CommandErr: System command failed to run.
+Originating Exception: None"#
+            ),
+        ];
+
+        for (err, msg) in pairs {
+            assert_eq!(format!("{}", err), msg)
+        }
+    }
+
+
+    #[test]
+    fn test_calculate_error_messages() {
+        let pairs = vec![
+            (
+                CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
+                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
+Filepath: dummy/path/file.json
+Originating Exception:None"#
+            ),
+            (
+                CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
+                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
+Filepath: dummy/path/file.json
+Originating Exception:None"#
+            ),
+            (
+                CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"NoResultsErr: The results directory has no json files in it.
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                CalculateError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()),
+                r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
+File Count: 3
+Filepath: dummy/path/no_file/"#
+            ),
+            (
+                CalculateError::BadGroupSizeErr(1, vec![MeasurementGroup {
+                    version: "dev".to_owned(),
+                    run: "some command".to_owned(),
+                    measurement: Measurement {
+                        command: "some command".to_owned(),
+                        mean: 1.0,
+                        stddev: 1.0,
+                        median: 1.0,
+                        user: 1.0,
+                        system: 1.0,
+                        min: 1.0,
+                        max: 1.0,
+                        times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
+                    }
+                }]),
+                r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
+Count: 1
+Group: [("dev", "some command")]"#
+            ),
+            (
+                CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
+                r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
+Found: boop, noop"#
+            ),
+        ];
+
+        for (err, msg) in pairs {
+            assert_eq!(format!("{}", err), msg)
+        }
+    }
+
+}
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 5c289bf3a27..1f9482cb7dd 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -1,6 +1,7 @@
 extern crate structopt;
 
 mod calculate;
+mod exceptions;
 mod measure;
 
 use crate::calculate::Calculation;
diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index 91222aa5f1a..c15d30f1ae5 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -1,8 +1,9 @@
 extern crate structopt;
 
+use crate::exceptions::IOError;
 use std::path::PathBuf;
 use std::process::{Command, ExitStatus};
-use std::{fs, io};
+use std::fs;
 use structopt::StructOpt;
 
 #[derive(Clone, Debug, StructOpt)]
@@ -38,7 +39,7 @@ impl Metric<'_> {
 }
 
 // calls hyperfine via system command, and returns result of runs
-pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, io::Error> {
+pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, IOError> {
     // to add a new metric to the test suite, simply define it in this list:
     // TODO read from some config file?
     let metrics: Vec<Metric> = vec![
@@ -49,18 +50,25 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
         },
     ];
 
-    // list out all projects
-    let project_entries = fs::read_dir(projects_directory).unwrap();
-
-    project_entries
+    // run hyperfine on each target project in the directory
+    fs::read_dir(projects_directory)
+        .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
         .map(|entry| {
             metrics
                 .clone()
                 .into_iter()
+                // for each entry-metric pair
                 .map(|metric| {
-                    let path = entry.as_ref().unwrap().path();
-                    let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap();
+                    let ent = entry
+                        .as_ref()
+                        .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), None)))?; // TODO change to Some(e)
 
+                    let path: PathBuf = ent.path();
+                    let project_name: &str = path
+                        .file_name()
+                        .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
+                        .and_then(|x| x.to_str().ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf())))?;
+                    
                     Command::new("hyperfine")
                         .current_dir(&path)
                         // warms filesystem caches by running the command first without counting it.
@@ -72,13 +80,14 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
                         .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                         .arg("--export-json")
                         .arg(
-                            ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""),
+                            ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""),
                         )
                         // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
                         .arg("--show-output")
                         .status() // use spawn() here instead for more information
+                        .or_else(|e| Err(IOError::CommandErr(Some(e))))
                 })
-                .collect::<Vec<Result<ExitStatus, io::Error>>>()
+                .collect::<Vec<Result<ExitStatus, IOError>>>()
         })
         .flatten()
         .collect()

From 2f9907b07291c2814298529d918f3c0ea1247ee7 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 12:34:57 -0400
Subject: [PATCH 53/81] remove one more unwrap

---
 performance/runner/src/calculate.rs  | 24 ++++++++++++++++--------
 performance/runner/src/exceptions.rs |  1 +
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 3f2f5f52cba..7fdf7135564 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -3,6 +3,7 @@ use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::fs;
+use std::fs::DirEntry;
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurement {
@@ -86,22 +87,29 @@ fn measurements_from_files(
 
     result_files
         .into_iter()
-        .map(|f| f.unwrap().path())
-        .filter(|filename| {
-            filename
+        .map(|entry| {
+            let ent: DirEntry = entry.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
+                .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
+
+            Ok(ent.path())
+        })
+        .collect::<Result<Vec<PathBuf>, CalculateError>>()?
+        .into_iter()
+        .filter(|path| {
+            path
                 .extension()
                 .and_then(|ext| ext.to_str())
                 .map_or(false, |ext| ext.ends_with("json"))
         })
-        .map(|filename| {
-            fs::read_to_string(&filename)
-                .or_else(|e| Err(IOError::BadFileContentsErr(filename.clone(), Some(e))))
+        .map(|path| {
+            fs::read_to_string(&path)
+                .or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e))))
                 .or_else(|e| Err(CalculateError::CalculateIOError(e)))
                 .and_then(|contents| {
                     serde_json::from_str::<Measurements>(&contents)
-                        .or_else(|e| Err(CalculateError::BadJSONErr(filename.clone(), Some(e))))
+                        .or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e))))
                 })
-                .map(|m| (filename, m))
+                .map(|m| (path, m))
         })
         .collect()
 }
diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs
index 6166a59dd50..66433db5c6d 100644
--- a/performance/runner/src/exceptions.rs
+++ b/performance/runner/src/exceptions.rs
@@ -1,6 +1,7 @@
 use crate::calculate::*;
 use std::path::PathBuf;
 use std::io;
+#[cfg(test)]
 use std::path::Path;
 use thiserror::Error;
 

From 3c8daacd3ed3f2ae8f3fb3cd5062fea633bb478c Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 13:12:01 -0400
Subject: [PATCH 54/81] refactor for simpler flow

---
 performance/runner/src/measure.rs | 76 +++++++++++++++++--------------
 1 file changed, 41 insertions(+), 35 deletions(-)

diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index c15d30f1ae5..45ac0700163 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -49,46 +49,52 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
             cmd: "dbt parse --no-version-check",
         },
     ];
-
-    // run hyperfine on each target project in the directory
+    
     fs::read_dir(projects_directory)
         .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
         .map(|entry| {
-            metrics
+            let path = entry
+                .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
+                .path();
+
+            let project_name: String = path
+                .file_name()
+                .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
+                .and_then(|x| x.to_str().ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf())))?
+                .to_owned();
+                
+            // each project-metric pair we will run
+            let pairs = metrics
                 .clone()
                 .into_iter()
-                // for each entry-metric pair
-                .map(|metric| {
-                    let ent = entry
-                        .as_ref()
-                        .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), None)))?; // TODO change to Some(e)
+                .map(|metric| (path.clone(), project_name.clone(), metric))
+                .collect::<Vec<(PathBuf, String, Metric)>>();
 
-                    let path: PathBuf = ent.path();
-                    let project_name: &str = path
-                        .file_name()
-                        .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
-                        .and_then(|x| x.to_str().ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf())))?;
-                    
-                    Command::new("hyperfine")
-                        .current_dir(&path)
-                        // warms filesystem caches by running the command first without counting it.
-                        // alternatively we could clear them before each run
-                        .arg("--warmup")
-                        .arg("1")
-                        .arg("--prepare")
-                        .arg(metric.prepare)
-                        .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
-                        .arg("--export-json")
-                        .arg(
-                            ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""),
-                        )
-                        // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
-                        .arg("--show-output")
-                        .status() // use spawn() here instead for more information
-                        .or_else(|e| Err(IOError::CommandErr(Some(e))))
-                })
-                .collect::<Vec<Result<ExitStatus, IOError>>>()
+            Ok(pairs)
         })
-        .flatten()
-        .collect()
+        .collect::<Result<Vec<Vec<(PathBuf, String, Metric)>>, IOError>>()?
+        .concat()
+        .into_iter()
+        // run hyperfine on each pairing
+        .map(|(path, project_name, metric)| {
+            Command::new("hyperfine")
+                .current_dir(&path)
+                // warms filesystem caches by running the command first without counting it.
+                // alternatively we could clear them before each run
+                .arg("--warmup")
+                .arg("1")
+                .arg("--prepare")
+                .arg(metric.prepare)
+                .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
+                .arg("--export-json")
+                .arg(
+                    ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""),
+                )
+                // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
+                .arg("--show-output")
+                .status() // use spawn() here instead for more information
+                .or_else(|e| Err(IOError::CommandErr(Some(e))))
+        }
+    )
+    .collect()
 }

From 94c6cf1b3c48452ad97605aa16f281176cc1cc5c Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 13:19:39 -0400
Subject: [PATCH 55/81] remove some clones

---
 performance/runner/src/calculate.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 7fdf7135564..adb09292235 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -59,8 +59,8 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
             data: Data {
                 threshold: median_threshold,
                 difference: median_difference,
-                baseline: baseline.median.clone(),
-                dev: dev.median.clone(),
+                baseline: baseline.median,
+                dev: dev.median,
             }
         },
         Calculation {
@@ -69,8 +69,8 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
             data: Data {
                 threshold: stddev_threshold,
                 difference: stddev_difference,
-                baseline: baseline.stddev.clone(),
-                dev: dev.stddev.clone(),
+                baseline: baseline.stddev,
+                dev: dev.stddev,
             }
         },
     ]

From 2782a33ecf686882190b9c02c0b55c1df7079984 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 13:31:48 -0400
Subject: [PATCH 56/81] minor refactor

---
 performance/runner/src/calculate.rs | 35 +++++++++++++++++------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index adb09292235..e297aa96157 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -174,20 +174,27 @@ fn calculate_regressions(
 }
 
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
-    measurements_from_files(Path::new(&results_directory)).and_then(|v| {
+    measurements_from_files(Path::new(&results_directory)).and_then(|v| match v[..] {
         // exit early with an Err if there are no results to process
-        match v.len() {
-            0 => Err(CalculateError::NoResultsErr(results_directory.clone())),
-            i if i % 2 == 1 => Err(CalculateError::OddResultsCountErr(i, results_directory.clone())),
-            _ => Ok(()),
-        }?;
-
-        let v_next: Vec<(PathBuf, Measurement)> = v
-            .into_iter()
-            // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
-            .map(|(p, ms)| (p, ms.results[0].clone()))
-            .collect();
-
-        calculate_regressions(&v_next)
+        [] => {
+            Err(CalculateError::NoResultsErr(results_directory.clone()))
+        },
+
+        _ => {
+            // we expect two runs for each project-metric pairing: one for each branch, baseline
+            // and dev. An odd result count is unexpected.
+            if v.len() % 2 == 1 {
+                Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone()))
+            } else {
+                // otherwise, we can do our comparisons
+                let measurements = v
+                    .into_iter()
+                    // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
+                    .map(|(p, ms)| (p, ms.results[0].clone()))
+                    .collect::<Vec<(PathBuf, Measurement)>>();
+
+                calculate_regressions(&measurements)
+            }
+        },
     })
 }

From b77eff8f6fcd026e40564e69de7cc2ae3a69858d Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 13:34:46 -0400
Subject: [PATCH 57/81] errors to warnings in ci

---
 .github/workflows/performance.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 01fa3855e5f..adb3528c03b 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -20,6 +20,9 @@ jobs:
   test-runner:
     name: Test Runner
     runs-on: ubuntu-latest
+    env:
+      # turns errors into warnings
+      RUSTFLAGS: "-D warnings"
     steps:
       - uses: actions/checkout@v2
       - uses: actions-rs/toolchain@v1

From 0b4689f311703b2bd7b4a41bb23c245f8d77373e Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 15:14:37 -0400
Subject: [PATCH 58/81] address PR feedback

---
 .github/workflows/performance.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index adb3528c03b..94d111f5b9e 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -86,9 +86,14 @@ jobs:
           name: dev-results
           path: performance/results/
 
-  # run the performance measurements on the latest release branch
-  # this part takes by far the longest, so we do everything we can first
-  # so the job fails fast.
+  # run the performance measurements on the release branch which we use
+  # as a performance baseline. This part takes by far the longest, so
+  # we do everything we can first so the job fails fast.
+  # -----
+  # we need to checkout dbt twice in this job: once for the baseline dbt
+  # version, and once to get the latest regression testing projects,
+  # metrics, and runner code from the develop or current branch so that
+  # the calculations match for both versions of dbt we are comparing.
   measure-baseline:
     needs: [build-runner]
     name: Measure Baseline Branch
@@ -134,7 +139,6 @@ jobs:
     name: Compare Results
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
       - uses: actions/download-artifact@v2
         with:
           name: dev-results

From 4b1c6b51f9f2975b5a09ae01f16644cac1cefc2f Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 16:39:53 -0400
Subject: [PATCH 59/81] fix spacing

---
 performance/runner/src/exceptions.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs
index 66433db5c6d..59062c3e3be 100644
--- a/performance/runner/src/exceptions.rs
+++ b/performance/runner/src/exceptions.rs
@@ -8,13 +8,13 @@ use thiserror::Error;
 
 #[derive(Debug, Error)]
 pub enum IOError {
-    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     ReadErr(PathBuf, Option<io::Error>),
     #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
     MissingFilenameErr(PathBuf),
     #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
     FilenameNotUnicodeErr(PathBuf),
-    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     BadFileContentsErr(PathBuf, Option<io::Error>),
     #[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     CommandErr(Option<io::Error>),
@@ -23,7 +23,7 @@ pub enum IOError {
 
 #[derive(Debug, Error)]
 pub enum CalculateError {
-    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
+    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
     BadJSONErr(PathBuf, Option<serde_json::Error>),
     #[error("{}", .0)]
     CalculateIOError(IOError),
@@ -49,7 +49,7 @@ mod tests {
                 IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"ReadErr: The file cannot be read.
 Filepath: dummy/path/file.json
-Originating Exception:None"#
+Originating Exception: None"#
             ),
             (
                 IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
@@ -65,7 +65,7 @@ Filepath: dummy/path/no_file/"#
                 IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None),
                 r#"BadFileContentsErr: Check that the file exists and is readable.
 Filepath: dummy/path/filenotexist.json
-Originating Exception:None"#
+Originating Exception: None"#
             ),
             (
                 IOError::CommandErr(None),
@@ -87,13 +87,13 @@ Originating Exception: None"#
                 CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
-Originating Exception:None"#
+Originating Exception: None"#
             ),
             (
                 CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
-Originating Exception:None"#
+Originating Exception: None"#
             ),
             (
                 CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),

From fe0b9e7ef5d2bf6eab960d210ffe820d6ea8e085 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:00:16 -0400
Subject: [PATCH 60/81] refactor to use references better

---
 performance/runner/src/calculate.rs | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index e297aa96157..52ead4a34ff 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -81,11 +81,9 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
 fn measurements_from_files(
     results_directory: &Path,
 ) -> Result<Vec<(PathBuf, Measurements)>, CalculateError> {
-    let result_files = fs::read_dir(results_directory)
+    fs::read_dir(results_directory)
         .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
-        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
-
-    result_files
+        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?
         .into_iter()
         .map(|entry| {
             let ent: DirEntry = entry.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
@@ -94,7 +92,7 @@ fn measurements_from_files(
             Ok(ent.path())
         })
         .collect::<Result<Vec<PathBuf>, CalculateError>>()?
-        .into_iter()
+        .iter()
         .filter(|path| {
             path
                 .extension()
@@ -102,14 +100,14 @@ fn measurements_from_files(
                 .map_or(false, |ext| ext.ends_with("json"))
         })
         .map(|path| {
-            fs::read_to_string(&path)
+            fs::read_to_string(path)
                 .or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e))))
                 .or_else(|e| Err(CalculateError::CalculateIOError(e)))
                 .and_then(|contents| {
                     serde_json::from_str::<Measurements>(&contents)
                         .or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e))))
                 })
-                .map(|m| (path, m))
+                .map(|m| (path.clone(), m))
         })
         .collect()
 }

From 02007b36199f5a73944a43665b28dfd8d491a71a Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:05:23 -0400
Subject: [PATCH 61/81] more reference handling

---
 performance/runner/src/measure.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index 45ac0700163..f2cce2d3156 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -39,7 +39,7 @@ impl Metric<'_> {
 }
 
 // calls hyperfine via system command, and returns result of runs
-pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, IOError> {
+pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, IOError> {
     // to add a new metric to the test suite, simply define it in this list:
     // TODO read from some config file?
     let metrics: Vec<Metric> = vec![
@@ -65,20 +65,19 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
                 
             // each project-metric pair we will run
             let pairs = metrics
-                .clone()
-                .into_iter()
+                .iter()
                 .map(|metric| (path.clone(), project_name.clone(), metric))
-                .collect::<Vec<(PathBuf, String, Metric)>>();
+                .collect::<Vec<(PathBuf, String, &Metric<'a>)>>();
 
             Ok(pairs)
         })
-        .collect::<Result<Vec<Vec<(PathBuf, String, Metric)>>, IOError>>()?
+        .collect::<Result<Vec<Vec<(PathBuf, String, &Metric<'a>)>>, IOError>>()?
         .concat()
-        .into_iter()
+        .iter()
         // run hyperfine on each pairing
         .map(|(path, project_name, metric)| {
             Command::new("hyperfine")
-                .current_dir(&path)
+                .current_dir(path)
                 // warms filesystem caches by running the command first without counting it.
                 // alternatively we could clear them before each run
                 .arg("--warmup")
@@ -88,7 +87,7 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<Exi
                 .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                 .arg("--export-json")
                 .arg(
-                    ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""),
+                    ["../../results/", &metric.outfile(project_name, dbt_branch)].join(""),
                 )
                 // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
                 .arg("--show-output")

From 751ea925769a0cf720def00bf9d8fdc389d7d252 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:33:29 -0400
Subject: [PATCH 62/81] make the happy path use references and clone in the
 exception paths

---
 performance/runner/src/calculate.rs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 52ead4a34ff..40ed654b899 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -118,7 +118,7 @@ fn calculate_regressions(
     measurements: &[(PathBuf, Measurement)],
 ) -> Result<Vec<Calculation>, CalculateError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
-        .into_iter()
+        .iter()
         .map(|(p, m)| {
             p.file_name()
                 .ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf()))
@@ -144,11 +144,11 @@ fn calculate_regressions(
     let sorted_measurement_groups = measurement_groups;
 
     let calculations: Vec<Calculation> = sorted_measurement_groups
-        .into_iter()
+        .iter()
         .group_by(|x| x.run.clone())
         .into_iter()
         .map(|(_, g)| {
-            let mut groups: Vec<MeasurementGroup> = g.collect();
+            let mut groups: Vec<&MeasurementGroup> = g.collect();
             groups.sort_by(|x, y| x.version.cmp(&y.version));
 
             match groups.len() {
@@ -162,7 +162,10 @@ fn calculate_regressions(
                         Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))
                     }
                 },
-                i => Err(CalculateError::BadGroupSizeErr(i, groups)),
+                i => {
+                    let gs: Vec<MeasurementGroup> = groups.into_iter().map(|x| x.clone()).collect();
+                    Err(CalculateError::BadGroupSizeErr(i, gs))
+                },
             }
         })
         .collect::<Result<Vec<Vec<Calculation>>, CalculateError>>()?

From 44a8f6a3bfdf0278906a6c19893c140ae9bb7999 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:46:45 -0400
Subject: [PATCH 63/81] more reference improvements

---
 performance/runner/src/calculate.rs | 40 +++++++++++++----------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 40ed654b899..2609e70a8fd 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -115,7 +115,7 @@ fn measurements_from_files(
 // given a list of filename-measurement pairs, detect any regressions by grouping
 // measurements together by filename
 fn calculate_regressions(
-    measurements: &[(PathBuf, Measurement)],
+    measurements: &[(&PathBuf, &Measurement)],
 ) -> Result<Vec<Calculation>, CalculateError> {
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
         .iter()
@@ -131,7 +131,7 @@ fn calculate_regressions(
                     MeasurementGroup {
                         version: parts[0].to_owned(),
                         run: parts[1..].join("_"),
-                        measurement: m.clone(),
+                        measurement: (*m).clone(),
                     }
                 })
         })
@@ -175,27 +175,23 @@ fn calculate_regressions(
 }
 
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
-    measurements_from_files(Path::new(&results_directory)).and_then(|v| match v[..] {
+    measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         // exit early with an Err if there are no results to process
-        [] => {
+        if v.len() <= 0 {
             Err(CalculateError::NoResultsErr(results_directory.clone()))
-        },
-
-        _ => {
-            // we expect two runs for each project-metric pairing: one for each branch, baseline
-            // and dev. An odd result count is unexpected.
-            if v.len() % 2 == 1 {
-                Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone()))
-            } else {
-                // otherwise, we can do our comparisons
-                let measurements = v
-                    .into_iter()
-                    // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
-                    .map(|(p, ms)| (p, ms.results[0].clone()))
-                    .collect::<Vec<(PathBuf, Measurement)>>();
-
-                calculate_regressions(&measurements)
-            }
-        },
+        // we expect two runs for each project-metric pairing: one for each branch, baseline
+        // and dev. An odd result count is unexpected.
+        } else if v.len() % 2 == 1 {
+            Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone()))
+        } else {
+            // otherwise, we can do our comparisons
+            let measurements = v
+                .iter()
+                // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
+                .map(|(p, ms)| (p, &ms.results[0]))
+                .collect::<Vec<(&PathBuf, &Measurement)>>();
+
+            calculate_regressions(&measurements[..])
+        }
     })
 }

From 02839ec77916419f9f9f5b0204c1567efc8bd2bf Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:48:13 -0400
Subject: [PATCH 64/81] iter instead of into_iter

---
 performance/runner/src/main.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 1f9482cb7dd..e02a4f1ce67 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -89,8 +89,8 @@ fn main() {
             f.write_all(json_calcs.as_bytes()).expect("unable to write data");
 
             // filter for regressions
-            let regressions: Vec<Calculation> = calculations
-                .into_iter()
+            let regressions: Vec<&Calculation> = calculations
+                .iter()
                 .filter(|c| c.regression)
                 .collect();
 

From 88acf0727ba18780aa7077688e7859733a732c8d Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 4 Aug 2021 17:50:32 -0400
Subject: [PATCH 65/81] remove clone

---
 performance/runner/src/calculate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 2609e70a8fd..324f33f2762 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -145,7 +145,7 @@ fn calculate_regressions(
 
     let calculations: Vec<Calculation> = sorted_measurement_groups
         .iter()
-        .group_by(|x| x.run.clone())
+        .group_by(|x| &x.run)
         .into_iter()
         .map(|(_, g)| {
             let mut groups: Vec<&MeasurementGroup> = g.collect();

From 2d3d1b030a895654d8d634f2b388159d2c557135 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 09:47:34 -0400
Subject: [PATCH 66/81] rename to dummy project

---
 .../dbt_project.yml                                               | 0
 .../models/path_0/node_0.sql                                      | 0
 .../models/path_0/node_0.yml                                      | 0
 .../models/path_0/node_1.sql                                      | 0
 .../models/path_0/node_1.yml                                      | 0
 .../models/path_0/node_2.sql                                      | 0
 .../models/path_0/node_2.yml                                      | 0
 .../dbt_project.yml                                               | 0
 .../models/path_0/node_0.sql                                      | 0
 .../models/path_0/node_0.yml                                      | 0
 .../models/path_0/node_1.sql                                      | 0
 .../models/path_0/node_1.yml                                      | 0
 .../models/path_0/node_2.sql                                      | 0
 .../models/path_0/node_2.yml                                      | 0
 14 files changed, 0 insertions(+), 0 deletions(-)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/dbt_project.yml (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_0.sql (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_0.yml (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_1.sql (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_1.yml (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_2.sql (100%)
 rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_2.yml (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/dbt_project.yml (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_0.sql (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_0.yml (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_1.sql (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_1.yml (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_2.sql (100%)
 rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_2.yml (100%)

diff --git a/performance/projects/01_mini_project_dont_merge/dbt_project.yml b/performance/projects/01_dummy_project/dbt_project.yml
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/dbt_project.yml
rename to performance/projects/01_dummy_project/dbt_project.yml
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/01_dummy_project/models/path_0/node_0.sql
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql
rename to performance/projects/01_dummy_project/models/path_0/node_0.sql
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/01_dummy_project/models/path_0/node_0.yml
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml
rename to performance/projects/01_dummy_project/models/path_0/node_0.yml
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/01_dummy_project/models/path_0/node_1.sql
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql
rename to performance/projects/01_dummy_project/models/path_0/node_1.sql
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/01_dummy_project/models/path_0/node_1.yml
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml
rename to performance/projects/01_dummy_project/models/path_0/node_1.yml
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/01_dummy_project/models/path_0/node_2.sql
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql
rename to performance/projects/01_dummy_project/models/path_0/node_2.sql
diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/01_dummy_project/models/path_0/node_2.yml
similarity index 100%
rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml
rename to performance/projects/01_dummy_project/models/path_0/node_2.yml
diff --git a/performance/projects/02_mini_project_dont_merge/dbt_project.yml b/performance/projects/02_dummy_project/dbt_project.yml
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/dbt_project.yml
rename to performance/projects/02_dummy_project/dbt_project.yml
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/02_dummy_project/models/path_0/node_0.sql
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql
rename to performance/projects/02_dummy_project/models/path_0/node_0.sql
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/02_dummy_project/models/path_0/node_0.yml
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml
rename to performance/projects/02_dummy_project/models/path_0/node_0.yml
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/02_dummy_project/models/path_0/node_1.sql
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql
rename to performance/projects/02_dummy_project/models/path_0/node_1.sql
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/02_dummy_project/models/path_0/node_1.yml
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml
rename to performance/projects/02_dummy_project/models/path_0/node_1.yml
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/02_dummy_project/models/path_0/node_2.sql
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql
rename to performance/projects/02_dummy_project/models/path_0/node_2.sql
diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/02_dummy_project/models/path_0/node_2.yml
similarity index 100%
rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml
rename to performance/projects/02_dummy_project/models/path_0/node_2.yml

From 6c8de62b24bc4df5478394d7fd44a6225d934699 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 09:48:33 -0400
Subject: [PATCH 67/81] up stddev threshold

---
 performance/runner/src/calculate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 324f33f2762..a47db8a91bd 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -49,7 +49,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
     let median_threshold = 1.05; // 5% regression threshold
     let median_difference = dev.median / baseline.median;
 
-    let stddev_threshold = 1.05; // 5% regression threshold
+    let stddev_threshold = 1.20; // 20% regression threshold
     let stddev_difference = dev.stddev / baseline.stddev;
 
     vec![

From d9b02fb0a09579616a872938f514f1849568ecc3 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:03:13 -0400
Subject: [PATCH 68/81] add lots of comments

---
 performance/runner/src/calculate.rs  | 36 +++++++++++++++++++++---
 performance/runner/src/exceptions.rs | 11 +++++++-
 performance/runner/src/main.rs       | 42 +++++++++++++++++-----------
 performance/runner/src/measure.rs    | 40 ++++++++++----------------
 4 files changed, 83 insertions(+), 46 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index a47db8a91bd..c5fd0e2eb90 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -5,6 +5,10 @@ use std::path::{Path, PathBuf};
 use std::fs;
 use std::fs::DirEntry;
 
+
+// This type exactly matches the type of array elements
+// from hyperfine's output. Deriving `Serialize` and `Deserialize`
+// gives us read and write capabilities via json_serde.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurement {
     pub command: String,
@@ -18,11 +22,18 @@ pub struct Measurement {
     pub times: Vec<f64>,
 }
 
+
+// This type exactly matches the type of hyperfine's output. 
+// Deriving `Serialize` and `Deserialize` gives us read and
+// write capabilities via json_serde.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurements {
     pub results: Vec<Measurement>,
 }
 
+
+// Output data from a comparison between runs on the baseline
+// and dev branches.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Data {
     pub threshold: f64,
@@ -31,6 +42,8 @@ pub struct Data {
     pub dev: f64,
 }
 
+// The full output from a comparison between runs on the baseline
+// and dev branches.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Calculation {
     pub metric: String,
@@ -38,6 +51,8 @@ pub struct Calculation {
     pub data: Data,
 }
 
+// A type to describe which measurement we are working with. This
+// information is parsed from the filename of hyperfine's output.
 #[derive(Debug, Clone, PartialEq)]
 pub struct MeasurementGroup {
     pub version: String,
@@ -45,6 +60,8 @@ pub struct MeasurementGroup {
     pub measurement: Measurement,
 }
 
+// Given two measurements, return all the calculations. Calculations are
+// flagged as regressions or not regressions.
 fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
     let median_threshold = 1.05; // 5% regression threshold
     let median_difference = dev.median / baseline.median;
@@ -76,8 +93,8 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
     ]
 }
 
-// given a directory, read all files in the directory and return each
-// filename with the deserialized json contents of that file
+// Given a directory, read all files in the directory and return each
+// filename with the deserialized json contents of that file.
 fn measurements_from_files(
     results_directory: &Path,
 ) -> Result<Vec<(PathBuf, Measurements)>, CalculateError> {
@@ -112,11 +129,19 @@ fn measurements_from_files(
         .collect()
 }
 
-// given a list of filename-measurement pairs, detect any regressions by grouping
-// measurements together by filename
+// Given a list of filename-measurement pairs, detect any regressions by grouping
+// measurements together by filename.
 fn calculate_regressions(
     measurements: &[(&PathBuf, &Measurement)],
 ) -> Result<Vec<Calculation>, CalculateError> {
+    /* 
+        Strategy of this function body:
+        1. [Measurement] -> [MeasurementGroup]
+        2. Sort the MeasurementGroups
+        3. Group the MeasurementGroups by "run"
+        4. Call `calculate` with the two resulting Measurements as input
+    */
+
     let mut measurement_groups: Vec<MeasurementGroup> = measurements
         .iter()
         .map(|(p, m)| {
@@ -174,6 +199,9 @@ fn calculate_regressions(
     Ok(calculations)
 }
 
+// Top-level function. Given a path for the result directory, call the above
+// functions to compare and collect calculations. Calculations include both
+// metrics that fall within the threshold and regressions.
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
     measurements_from_files(Path::new(&results_directory)).and_then(|v| {
         // exit early with an Err if there are no results to process
diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs
index 59062c3e3be..73a821d7408 100644
--- a/performance/runner/src/exceptions.rs
+++ b/performance/runner/src/exceptions.rs
@@ -6,6 +6,9 @@ use std::path::Path;
 use thiserror::Error;
 
 
+// Custom IO Error messages for the IO errors we encounter.
+// New constructors should be added to wrap any new IO errors.
+// The desired output of these errors is tested below.
 #[derive(Debug, Error)]
 pub enum IOError {
     #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
@@ -20,7 +23,10 @@ pub enum IOError {
     CommandErr(Option<io::Error>),
 }
 
-
+// Custom Error messages for the error states we could encounter
+// during calculation, and are not prevented at compile time. New
+// constructors should be added for any new error situations that
+// come up. The desired output of these errors is tested below.
 #[derive(Debug, Error)]
 pub enum CalculateError {
     #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
@@ -38,10 +44,12 @@ pub enum CalculateError {
 }
 
 
+// Tests for exceptions
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    // Tests the output fo io error messages. There should be at least one per enum constructor.
     #[test]
     fn test_io_error_messages() {
         let pairs = vec![
@@ -80,6 +88,7 @@ Originating Exception: None"#
     }
 
 
+    // Tests the output fo calculate error messages. There should be at least one per enum constructor.
     #[test]
     fn test_calculate_error_messages() {
         let pairs = vec![
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index e02a4f1ce67..bd12e060765 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -12,6 +12,9 @@ use std::path::PathBuf;
 use std::process::exit;
 use structopt::StructOpt;
 
+
+// This type defines the commandline interface and is generated
+// by `derive(StructOpt)`
 #[derive(Clone, Debug, StructOpt)]
 #[structopt(name = "performance", about = "performance regression testing runner")]
 enum Opt {
@@ -31,27 +34,29 @@ enum Opt {
     },
 }
 
-fn gracefully_exit_or<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) -> B {
-    match r {
-        Err(e) => {
-            println!("{}", e);
-            exit(1)
-        },
-        Ok(x) => f(x),
-    }
+// Given a result with a displayable error, map the value if there is one, or
+// print the human readable error message and exit with exit code 1.
+fn map_or_gracefully_exit<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) -> B {
+    r.map_or_else(|e| { println!("{}", e); exit(1) }, f)
 }
 
 // This is where all the printing, exiting, and error displaying 
 // should happen. Module functions should only return values.
 fn main() {
+    // match what the user inputs from the cli
     match Opt::from_args() {
 
+        // measure subcommand
         Opt::Measure { projects_dir, branch_name } => {
-            let statuses = gracefully_exit_or(
+            // get all the statuses of the hyperfine runs or
+            // gracefully show the user an exception
+            let statuses = map_or_gracefully_exit(
                 &|x| x,
                 measure::measure(&projects_dir, &branch_name)
             );
 
+            // only exit with exit code 0 if all hyperfine runs were
+            // dispatched correctly.
             for status in statuses {
                 match status.code() {
                     None => (),
@@ -65,28 +70,31 @@ fn main() {
             }
         },
 
+        // calculate subcommand
         Opt::Calculate { results_dir } => {
-            let calculations = gracefully_exit_or(
+            // get all the calculations or gracefully show the user an exception
+            let calculations = map_or_gracefully_exit(
                 &|x| x,
                 calculate::regressions(&results_dir)
             );
 
-            // print calculations to stdout
+            // print all calculations to stdout so they can be easily debugged
+            // via CI.
             println!(":: All Calculations ::\n");
             for c in &calculations {
                 println!("{:#?}\n", c);
             }
             println!("");
 
-            // write calculations to file so it can be downloaded as an artifact
+            // indented json string representation of the calculations array
             let json_calcs = serde_json::to_string_pretty(&calculations)
-                .expect("failed to serialize calculations to json");
+                .expect("Main: Failed to serialize calculations to json");
             
+            // create the empty destination file, and write the json string
             let outfile = &mut results_dir.into_os_string();
             outfile.push("/final_calculations.json");
-
-            let mut f = File::create(outfile).expect("Unable to create file");
-            f.write_all(json_calcs.as_bytes()).expect("unable to write data");
+            let mut f = File::create(outfile).expect("Main: Unable to create file");
+            f.write_all(json_calcs.as_bytes()).expect("Main: Unable to write data");
 
             // filter for regressions
             let regressions: Vec<&Calculation> = calculations
@@ -98,6 +106,8 @@ fn main() {
             match regressions[..] {
                 [] => println!("congrats! no regressions :)"),
                 _ => {
+                    // print all calculations to stdout so they can be easily debugged
+                    // via CI.
                     println!(":: Regressions Found ::\n");
                     println!("");
                     for r in regressions {
diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index f2cce2d3156..94b68d2581d 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -1,30 +1,11 @@
-extern crate structopt;
-
 use crate::exceptions::IOError;
 use std::path::PathBuf;
 use std::process::{Command, ExitStatus};
 use std::fs;
-use structopt::StructOpt;
 
-#[derive(Clone, Debug, StructOpt)]
-#[structopt(name = "performance", about = "performance regression testing runner")]
-enum Performance {
-    #[structopt(name = "measure")]
-    Measure {
-        #[structopt(parse(from_os_str))]
-        #[structopt(short)]
-        projects_dir: PathBuf,
-        #[structopt(short)]
-        branch_name: bool,
-    },
-    #[structopt(name = "compare")]
-    Compare {
-        #[structopt(parse(from_os_str))]
-        #[structopt(short)]
-        results_dir: PathBuf,
-    },
-}
 
+// `Metric` defines a dbt command that we want to measure on both the
+// baseline and dev branches.
 #[derive(Debug, Clone)]
 struct Metric<'a> {
     name: &'a str,
@@ -33,15 +14,23 @@ struct Metric<'a> {
 }
 
 impl Metric<'_> {
+    // Returns the proper filename for the hyperfine output for this metric.
     fn outfile(&self, project: &str, branch: &str) -> String {
         [branch, "_", self.name, "_", project, ".json"].join("")
     }
 }
 
-// calls hyperfine via system command, and returns result of runs
+// Calls hyperfine via system command, and returns all the exit codes for each hyperfine run.
 pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, IOError> {
-    // to add a new metric to the test suite, simply define it in this list:
-    // TODO read from some config file?
+    /* 
+        Strategy of this function body:
+        1. Read all directory names in `projects_directory`
+        2. Pair `n` projects with `m` metrics for a total of n*m pairs
+        3. Run hyperfine on each project-metric pair
+    */
+    
+    // To add a new metric to the test suite, simply define it in this list:
+    // TODO: This could be read from a config file in a future version.
     let metrics: Vec<Metric> = vec![
         Metric {
             name: "parse",
@@ -89,7 +78,8 @@ pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec
                 .arg(
                     ["../../results/", &metric.outfile(project_name, dbt_branch)].join(""),
                 )
-                // this prevents capture dbt output. Noisy, but good for debugging when tests fail.
+                // this prevents hyperfine from capturing dbt's output.
+                // Noisy, but good for debugging when tests fail.
                 .arg("--show-output")
                 .status() // use spawn() here instead for more information
                 .or_else(|e| Err(IOError::CommandErr(Some(e))))

From 52a0fdef6cfaaa15514cbd1eb3de49a541f9a63e Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:10:07 -0400
Subject: [PATCH 69/81] fmt

---
 performance/runner/src/calculate.rs  | 35 +++++++-------
 performance/runner/src/exceptions.rs | 71 +++++++++++++++-------------
 performance/runner/src/main.rs       | 46 +++++++++---------
 performance/runner/src/measure.rs    | 42 ++++++++--------
 4 files changed, 101 insertions(+), 93 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index c5fd0e2eb90..055cb11be76 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -1,10 +1,9 @@
 use crate::exceptions::{CalculateError, IOError};
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use std::path::{Path, PathBuf};
 use std::fs;
 use std::fs::DirEntry;
-
+use std::path::{Path, PathBuf};
 
 // This type exactly matches the type of array elements
 // from hyperfine's output. Deriving `Serialize` and `Deserialize`
@@ -22,8 +21,7 @@ pub struct Measurement {
     pub times: Vec<f64>,
 }
 
-
-// This type exactly matches the type of hyperfine's output. 
+// This type exactly matches the type of hyperfine's output.
 // Deriving `Serialize` and `Deserialize` gives us read and
 // write capabilities via json_serde.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -31,7 +29,6 @@ pub struct Measurements {
     pub results: Vec<Measurement>,
 }
 
-
 // Output data from a comparison between runs on the baseline
 // and dev branches.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -78,7 +75,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
                 difference: median_difference,
                 baseline: baseline.median,
                 dev: dev.median,
-            }
+            },
         },
         Calculation {
             metric: ["stddev", metric].join("_"),
@@ -88,7 +85,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
                 difference: stddev_difference,
                 baseline: baseline.stddev,
                 dev: dev.stddev,
-            }
+            },
         },
     ]
 }
@@ -103,7 +100,8 @@ fn measurements_from_files(
         .or_else(|e| Err(CalculateError::CalculateIOError(e)))?
         .into_iter()
         .map(|entry| {
-            let ent: DirEntry = entry.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
+            let ent: DirEntry = entry
+                .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
                 .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
 
             Ok(ent.path())
@@ -111,8 +109,7 @@ fn measurements_from_files(
         .collect::<Result<Vec<PathBuf>, CalculateError>>()?
         .iter()
         .filter(|path| {
-            path
-                .extension()
+            path.extension()
                 .and_then(|ext| ext.to_str())
                 .map_or(false, |ext| ext.ends_with("json"))
         })
@@ -134,7 +131,7 @@ fn measurements_from_files(
 fn calculate_regressions(
     measurements: &[(&PathBuf, &Measurement)],
 ) -> Result<Vec<Calculation>, CalculateError> {
-    /* 
+    /*
         Strategy of this function body:
         1. [Measurement] -> [MeasurementGroup]
         2. Sort the MeasurementGroups
@@ -180,17 +177,20 @@ fn calculate_regressions(
                 2 => {
                     let dev = &groups[1];
                     let baseline = &groups[0];
-                    
+
                     if dev.version == "dev" && baseline.version == "baseline" {
                         Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement))
                     } else {
-                        Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))
+                        Err(CalculateError::BadBranchNameErr(
+                            baseline.version.clone(),
+                            dev.version.clone(),
+                        ))
                     }
-                },
+                }
                 i => {
                     let gs: Vec<MeasurementGroup> = groups.into_iter().map(|x| x.clone()).collect();
                     Err(CalculateError::BadGroupSizeErr(i, gs))
-                },
+                }
             }
         })
         .collect::<Result<Vec<Vec<Calculation>>, CalculateError>>()?
@@ -210,7 +210,10 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, Calc
         // we expect two runs for each project-metric pairing: one for each branch, baseline
         // and dev. An odd result count is unexpected.
         } else if v.len() % 2 == 1 {
-            Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone()))
+            Err(CalculateError::OddResultsCountErr(
+                v.len(),
+                results_directory.clone(),
+            ))
         } else {
             // otherwise, we can do our comparisons
             let measurements = v
diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs
index 73a821d7408..f278da6c365 100644
--- a/performance/runner/src/exceptions.rs
+++ b/performance/runner/src/exceptions.rs
@@ -1,11 +1,10 @@
 use crate::calculate::*;
-use std::path::PathBuf;
 use std::io;
 #[cfg(test)]
 use std::path::Path;
+use std::path::PathBuf;
 use thiserror::Error;
 
-
 // Custom IO Error messages for the IO errors we encounter.
 // New constructors should be added to wrap any new IO errors.
 // The desired output of these errors is tested below.
@@ -43,7 +42,6 @@ pub enum CalculateError {
     BadBranchNameErr(String, String),
 }
 
-
 // Tests for exceptions
 #[cfg(test)]
 mod tests {
@@ -57,28 +55,31 @@ mod tests {
                 IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"ReadErr: The file cannot be read.
 Filepath: dummy/path/file.json
-Originating Exception: None"#
+Originating Exception: None"#,
             ),
             (
                 IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
                 r#"MissingFilenameErr: The path provided does not specify a file.
-Filepath: dummy/path/no_file/"#
+Filepath: dummy/path/no_file/"#,
             ),
             (
                 IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
                 r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
-Filepath: dummy/path/no_file/"#
+Filepath: dummy/path/no_file/"#,
             ),
             (
-                IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None),
+                IOError::BadFileContentsErr(
+                    Path::new("dummy/path/filenotexist.json").to_path_buf(),
+                    None,
+                ),
                 r#"BadFileContentsErr: Check that the file exists and is readable.
 Filepath: dummy/path/filenotexist.json
-Originating Exception: None"#
+Originating Exception: None"#,
             ),
             (
                 IOError::CommandErr(None),
                 r#"CommandErr: System command failed to run.
-Originating Exception: None"#
+Originating Exception: None"#,
             ),
         ];
 
@@ -87,7 +88,6 @@ Originating Exception: None"#
         }
     }
 
-
     // Tests the output fo calculate error messages. There should be at least one per enum constructor.
     #[test]
     fn test_calculate_error_messages() {
@@ -96,49 +96,55 @@ Originating Exception: None"#
                 CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
-Originating Exception: None"#
+Originating Exception: None"#,
             ),
             (
                 CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                 r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
-Originating Exception: None"#
+Originating Exception: None"#,
             ),
             (
                 CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
                 r#"NoResultsErr: The results directory has no json files in it.
-Filepath: dummy/path/no_file/"#
+Filepath: dummy/path/no_file/"#,
             ),
             (
-                CalculateError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()),
+                CalculateError::OddResultsCountErr(
+                    3,
+                    Path::new("dummy/path/no_file/").to_path_buf(),
+                ),
                 r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
 File Count: 3
-Filepath: dummy/path/no_file/"#
+Filepath: dummy/path/no_file/"#,
             ),
             (
-                CalculateError::BadGroupSizeErr(1, vec![MeasurementGroup {
-                    version: "dev".to_owned(),
-                    run: "some command".to_owned(),
-                    measurement: Measurement {
-                        command: "some command".to_owned(),
-                        mean: 1.0,
-                        stddev: 1.0,
-                        median: 1.0,
-                        user: 1.0,
-                        system: 1.0,
-                        min: 1.0,
-                        max: 1.0,
-                        times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
-                    }
-                }]),
+                CalculateError::BadGroupSizeErr(
+                    1,
+                    vec![MeasurementGroup {
+                        version: "dev".to_owned(),
+                        run: "some command".to_owned(),
+                        measurement: Measurement {
+                            command: "some command".to_owned(),
+                            mean: 1.0,
+                            stddev: 1.0,
+                            median: 1.0,
+                            user: 1.0,
+                            system: 1.0,
+                            min: 1.0,
+                            max: 1.0,
+                            times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
+                        },
+                    }],
+                ),
                 r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
 Count: 1
-Group: [("dev", "some command")]"#
+Group: [("dev", "some command")]"#,
             ),
             (
                 CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
                 r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
-Found: boop, noop"#
+Found: boop, noop"#,
             ),
         ];
 
@@ -146,5 +152,4 @@ Found: boop, noop"#
             assert_eq!(format!("{}", err), msg)
         }
     }
-
 }
diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index bd12e060765..eb3ad01f0a1 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -12,7 +12,6 @@ use std::path::PathBuf;
 use std::process::exit;
 use structopt::StructOpt;
 
-
 // This type defines the commandline interface and is generated
 // by `derive(StructOpt)`
 #[derive(Clone, Debug, StructOpt)]
@@ -37,23 +36,29 @@ enum Opt {
 // Given a result with a displayable error, map the value if there is one, or
 // print the human readable error message and exit with exit code 1.
 fn map_or_gracefully_exit<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) -> B {
-    r.map_or_else(|e| { println!("{}", e); exit(1) }, f)
+    r.map_or_else(
+        |e| {
+            println!("{}", e);
+            exit(1)
+        },
+        f,
+    )
 }
 
-// This is where all the printing, exiting, and error displaying 
+// This is where all the printing, exiting, and error displaying
 // should happen. Module functions should only return values.
 fn main() {
     // match what the user inputs from the cli
     match Opt::from_args() {
-
         // measure subcommand
-        Opt::Measure { projects_dir, branch_name } => {
+        Opt::Measure {
+            projects_dir,
+            branch_name,
+        } => {
             // get all the statuses of the hyperfine runs or
             // gracefully show the user an exception
-            let statuses = map_or_gracefully_exit(
-                &|x| x,
-                measure::measure(&projects_dir, &branch_name)
-            );
+            let statuses =
+                map_or_gracefully_exit(&|x| x, measure::measure(&projects_dir, &branch_name));
 
             // only exit with exit code 0 if all hyperfine runs were
             // dispatched correctly.
@@ -65,18 +70,15 @@ fn main() {
                     Some(nonzero) => {
                         println!("a child process exited with a nonzero status code.");
                         exit(nonzero)
-                    },
+                    }
                 }
             }
-        },
+        }
 
         // calculate subcommand
         Opt::Calculate { results_dir } => {
             // get all the calculations or gracefully show the user an exception
-            let calculations = map_or_gracefully_exit(
-                &|x| x,
-                calculate::regressions(&results_dir)
-            );
+            let calculations = map_or_gracefully_exit(&|x| x, calculate::regressions(&results_dir));
 
             // print all calculations to stdout so they can be easily debugged
             // via CI.
@@ -89,18 +91,17 @@ fn main() {
             // indented json string representation of the calculations array
             let json_calcs = serde_json::to_string_pretty(&calculations)
                 .expect("Main: Failed to serialize calculations to json");
-            
+
             // create the empty destination file, and write the json string
             let outfile = &mut results_dir.into_os_string();
             outfile.push("/final_calculations.json");
             let mut f = File::create(outfile).expect("Main: Unable to create file");
-            f.write_all(json_calcs.as_bytes()).expect("Main: Unable to write data");
+            f.write_all(json_calcs.as_bytes())
+                .expect("Main: Unable to write data");
 
             // filter for regressions
-            let regressions: Vec<&Calculation> = calculations
-                .iter()
-                .filter(|c| c.regression)
-                .collect();
+            let regressions: Vec<&Calculation> =
+                calculations.iter().filter(|c| c.regression).collect();
 
             // exit with non zero exit code if there are regressions
             match regressions[..] {
@@ -117,7 +118,6 @@ fn main() {
                     exit(1)
                 }
             }
-        },
-
+        }
     }
 }
diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs
index 94b68d2581d..35df2b997c7 100644
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -1,8 +1,7 @@
 use crate::exceptions::IOError;
+use std::fs;
 use std::path::PathBuf;
 use std::process::{Command, ExitStatus};
-use std::fs;
-
 
 // `Metric` defines a dbt command that we want to measure on both the
 // baseline and dev branches.
@@ -21,24 +20,25 @@ impl Metric<'_> {
 }
 
 // Calls hyperfine via system command, and returns all the exit codes for each hyperfine run.
-pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec<ExitStatus>, IOError> {
-    /* 
+pub fn measure<'a>(
+    projects_directory: &PathBuf,
+    dbt_branch: &str,
+) -> Result<Vec<ExitStatus>, IOError> {
+    /*
         Strategy of this function body:
         1. Read all directory names in `projects_directory`
         2. Pair `n` projects with `m` metrics for a total of n*m pairs
         3. Run hyperfine on each project-metric pair
     */
-    
+
     // To add a new metric to the test suite, simply define it in this list:
     // TODO: This could be read from a config file in a future version.
-    let metrics: Vec<Metric> = vec![
-        Metric {
-            name: "parse",
-            prepare: "rm -rf target/",
-            cmd: "dbt parse --no-version-check",
-        },
-    ];
-    
+    let metrics: Vec<Metric> = vec![Metric {
+        name: "parse",
+        prepare: "rm -rf target/",
+        cmd: "dbt parse --no-version-check",
+    }];
+
     fs::read_dir(projects_directory)
         .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
         .map(|entry| {
@@ -49,9 +49,12 @@ pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec
             let project_name: String = path
                 .file_name()
                 .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
-                .and_then(|x| x.to_str().ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf())))?
+                .and_then(|x| {
+                    x.to_str()
+                        .ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf()))
+                })?
                 .to_owned();
-                
+
             // each project-metric pair we will run
             let pairs = metrics
                 .iter()
@@ -75,15 +78,12 @@ pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result<Vec
                 .arg(metric.prepare)
                 .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                 .arg("--export-json")
-                .arg(
-                    ["../../results/", &metric.outfile(project_name, dbt_branch)].join(""),
-                )
+                .arg(["../../results/", &metric.outfile(project_name, dbt_branch)].join(""))
                 // this prevents hyperfine from capturing dbt's output.
                 // Noisy, but good for debugging when tests fail.
                 .arg("--show-output")
                 .status() // use spawn() here instead for more information
                 .or_else(|e| Err(IOError::CommandErr(Some(e))))
-        }
-    )
-    .collect()
+        })
+        .collect()
 }

From 3d9e54d970d063b1594cbde57597f42d7350ae48 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:14:30 -0400
Subject: [PATCH 70/81] add fmt check

---
 .github/workflows/performance.yml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 94d111f5b9e..85a5db34a24 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -15,6 +15,23 @@ on:
 
 jobs:
 
+  # checks fmt of runner code
+  fmt:
+    name: Cargo fmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: rustup component add rustfmt
+      - uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --all -- --check
+
   # runs any tests associated with the runner
   # these tests make sure the runner logic is correct
   test-runner:

From 5f9ed1a83c4d236c6f66ed22d498072b27639122 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:15:40 -0400
Subject: [PATCH 71/81] run twice a day

---
 .github/workflows/performance.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 85a5db34a24..79a1efc9980 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -8,8 +8,8 @@ on:
       - 'develop' 
       - 'performance-regression-testing'
   schedule:
-    # runs every day at 10:05pm
-    - cron:  '5 22 * * *'
+    # runs twice a day at 10:05am and 10:05pm
+    - cron:  '5 10,22 * * *'
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 

From 3e1e171c6645b9c4ee3926a6fb00f90b98f19b5f Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:31:00 -0400
Subject: [PATCH 72/81] add test for regression calculation

---
 performance/runner/src/calculate.rs | 44 +++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index 055cb11be76..bd22eacf03b 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -226,3 +226,47 @@ pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, Calc
         }
     })
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn detects_5_percent_regression() {
+        let dev = Measurement {
+            command: "some command".to_owned(),
+            mean:    1.06,
+            stddev:  1.06,
+            median:  1.06,
+            user:    1.06,
+            system:  1.06,
+            min:     1.06,
+            max:     1.06,
+            times:  vec![],
+        };
+
+        let baseline = Measurement {
+            command: "some command".to_owned(),
+            mean:    1.00,
+            stddev:  1.00,
+            median:  1.00,
+            user:    1.00,
+            system:  1.00,
+            min:     1.00,
+            max:     1.00,
+            times:  vec![],
+        };
+
+        let calculations = calculate("test_metric", &dev, &baseline);
+        let regressions: Vec<&Calculation> = calculations
+            .iter()
+            .filter(|calc| calc.regression)
+            .collect();
+
+        // expect one regression for median
+        println!("{:#?}", regressions);
+        assert_eq!(regressions.len(), 1);
+        assert_eq!(regressions[0].metric, "median_test_metric");
+    }
+
+}
\ No newline at end of file

From 2068dd5510c7c2f545c3edc5d5baa1d6d2c9b74e Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:31:19 -0400
Subject: [PATCH 73/81] fmt

---
 performance/runner/src/calculate.rs | 41 +++++++++++++----------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs
index bd22eacf03b..aff405d109b 100644
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -235,38 +235,35 @@ mod tests {
     fn detects_5_percent_regression() {
         let dev = Measurement {
             command: "some command".to_owned(),
-            mean:    1.06,
-            stddev:  1.06,
-            median:  1.06,
-            user:    1.06,
-            system:  1.06,
-            min:     1.06,
-            max:     1.06,
-            times:  vec![],
+            mean: 1.06,
+            stddev: 1.06,
+            median: 1.06,
+            user: 1.06,
+            system: 1.06,
+            min: 1.06,
+            max: 1.06,
+            times: vec![],
         };
 
         let baseline = Measurement {
             command: "some command".to_owned(),
-            mean:    1.00,
-            stddev:  1.00,
-            median:  1.00,
-            user:    1.00,
-            system:  1.00,
-            min:     1.00,
-            max:     1.00,
-            times:  vec![],
+            mean: 1.00,
+            stddev: 1.00,
+            median: 1.00,
+            user: 1.00,
+            system: 1.00,
+            min: 1.00,
+            max: 1.00,
+            times: vec![],
         };
 
         let calculations = calculate("test_metric", &dev, &baseline);
-        let regressions: Vec<&Calculation> = calculations
-            .iter()
-            .filter(|calc| calc.regression)
-            .collect();
+        let regressions: Vec<&Calculation> =
+            calculations.iter().filter(|calc| calc.regression).collect();
 
         // expect one regression for median
         println!("{:#?}", regressions);
         assert_eq!(regressions.len(), 1);
         assert_eq!(regressions[0].metric, "median_test_metric");
     }
-
-}
\ No newline at end of file
+}

From ef63319733ce4472969ba6a24004d7136fdb184c Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:39:19 -0400
Subject: [PATCH 74/81] add comment

---
 .github/workflows/performance.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 79a1efc9980..0c664011c87 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -16,6 +16,8 @@ on:
 jobs:
 
   # checks fmt of runner code
+  # purposefully not a dependency of any other job
+  # will block merging, but not prevent developing
   fmt:
     name: Cargo fmt
     runs-on: ubuntu-latest

From 87072707edab989b8d8ca0d35ff3f834a58684c6 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 11:40:48 -0400
Subject: [PATCH 75/81] point to manifest

---
 .github/workflows/performance.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
index 0c664011c87..7b72d85797e 100644
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -32,7 +32,7 @@ jobs:
       - uses: actions-rs/cargo@v1
         with:
           command: fmt
-          args: --all -- --check
+          args: --manifest-path performance/runner/Cargo.toml --all -- --check
 
   # runs any tests associated with the runner
   # these tests make sure the runner logic is correct

From 06cc0c57e815b0027ab27f5d667a833b75f0aa90 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 5 Aug 2021 12:27:21 -0400
Subject: [PATCH 76/81] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7159b16983..109aea86258 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@
 - Handle whitespace after a plus sign on the project config ([#3526](https://github.com/dbt-labs/dbt/pull/3526))
 
 ### Under the hood
+- Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602)
 - Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468))
 - Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473))
 - Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536))

From b90b3a9c196c812325584b525a4922a05ef7afe8 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Fri, 6 Aug 2021 14:46:10 -0400
Subject: [PATCH 77/81] avoid abandoned destructors by refactoring usage of
 exit

---
 performance/runner/src/main.rs | 81 +++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 41 deletions(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index eb3ad01f0a1..856f6b01682 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -5,11 +5,10 @@ mod exceptions;
 mod measure;
 
 use crate::calculate::Calculation;
-use std::fmt::Display;
+use crate::exceptions::CalculateError;
 use std::fs::File;
 use std::io::Write;
 use std::path::PathBuf;
-use std::process::exit;
 use structopt::StructOpt;
 
 // This type defines the commandline interface and is generated
@@ -33,21 +32,12 @@ enum Opt {
     },
 }
 
-// Given a result with a displayable error, map the value if there is one, or
-// print the human readable error message and exit with exit code 1.
-fn map_or_gracefully_exit<A, B, E: Display>(f: &dyn Fn(A) -> B, r: Result<A, E>) -> B {
-    r.map_or_else(
-        |e| {
-            println!("{}", e);
-            exit(1)
-        },
-        f,
-    )
-}
-
-// This is where all the printing, exiting, and error displaying
-// should happen. Module functions should only return values.
-fn main() {
+// enables proper useage of exit() in main.
+// https://doc.rust-lang.org/std/process/fn.exit.html#examples
+//
+// This is where all the printing should happen. Exiting happens
+// in main, and module functions should only return values.
+fn run_app() -> Result<i32, CalculateError> {
     // match what the user inputs from the cli
     match Opt::from_args() {
         // measure subcommand
@@ -55,30 +45,26 @@ fn main() {
             projects_dir,
             branch_name,
         } => {
-            // get all the statuses of the hyperfine runs or
-            // gracefully show the user an exception
-            let statuses =
-                map_or_gracefully_exit(&|x| x, measure::measure(&projects_dir, &branch_name));
-
-            // only exit with exit code 0 if all hyperfine runs were
-            // dispatched correctly.
-            for status in statuses {
-                match status.code() {
-                    None => (),
-                    Some(0) => (),
-                    // if any child command exited with a non zero status code, exit with the same one here.
-                    Some(nonzero) => {
-                        println!("a child process exited with a nonzero status code.");
-                        exit(nonzero)
-                    }
-                }
-            }
+            // if there are any nonzero exit codes from the hyperfine runs,
+            // return the first one. otherwise return zero.
+            measure::measure(&projects_dir, &branch_name)
+                .or_else(|e| Err(CalculateError::CalculateIOError(e)))?
+                .iter()
+                .map(|status| status.code())
+                .flatten()
+                .filter(|code| *code != 0)
+                .collect::<Vec<i32>>()
+                .get(0)
+                .map_or(Ok(0), |x| {
+                    println!("Main: a child process exited with a nonzero status code.");
+                    Ok(*x)
+                })
         }
 
         // calculate subcommand
         Opt::Calculate { results_dir } => {
             // get all the calculations or gracefully show the user an exception
-            let calculations = map_or_gracefully_exit(&|x| x, calculate::regressions(&results_dir));
+            let calculations = calculate::regressions(&results_dir)?;
 
             // print all calculations to stdout so they can be easily debugged
             // via CI.
@@ -103,21 +89,34 @@ fn main() {
             let regressions: Vec<&Calculation> =
                 calculations.iter().filter(|c| c.regression).collect();
 
-            // exit with non zero exit code if there are regressions
+            // return a non-zero exit code if there are regressions
             match regressions[..] {
-                [] => println!("congrats! no regressions :)"),
+                [] => {
+                    println!("congrats! no regressions :)");
+                    Ok(0)
+                }
                 _ => {
-                    // print all calculations to stdout so they can be easily debugged
-                    // via CI.
+                    // print all calculations to stdout so they can be easily
+                    // debugged via CI.
                     println!(":: Regressions Found ::\n");
                     println!("");
                     for r in regressions {
                         println!("{:#?}\n", r);
                     }
                     println!("");
-                    exit(1)
+                    Ok(1)
                 }
             }
         }
     }
 }
+
+fn main() {
+    std::process::exit(match run_app() {
+        Ok(code) => code,
+        Err(err) => {
+            eprintln!("{}", err);
+            1
+        }
+    });
+}

From cd6894acf49063adbd1d82c12d585f5aba5f09ae Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 9 Aug 2021 12:52:53 -0400
Subject: [PATCH 78/81] add to future work

---
 performance/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/performance/README.md b/performance/README.md
index 341e9da4333..a2ae7ef7fe3 100644
--- a/performance/README.md
+++ b/performance/README.md
@@ -14,3 +14,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu
 - add more dbt commands to measure
 - possibly using the uploaded json artifacts to store these results so they can be graphed over time
 - reading new metrics from a file so no one has to edit rust source to add them to the suite
+- instead of building the rust every time, we could publish pull down the latest deployed version.
\ No newline at end of file

From 355b0c496ea1264d886e4fd2b3927d8f8728dc56 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 9 Aug 2021 12:53:07 -0400
Subject: [PATCH 79/81] remove unnecessary newline printing

---
 performance/runner/src/main.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs
index 856f6b01682..28392d11c8d 100644
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -72,7 +72,6 @@ fn run_app() -> Result<i32, CalculateError> {
             for c in &calculations {
                 println!("{:#?}\n", c);
             }
-            println!("");
 
             // indented json string representation of the calculations array
             let json_calcs = serde_json::to_string_pretty(&calculations)
@@ -99,11 +98,9 @@ fn run_app() -> Result<i32, CalculateError> {
                     // print all calculations to stdout so they can be easily
                     // debugged via CI.
                     println!(":: Regressions Found ::\n");
-                    println!("");
                     for r in regressions {
                         println!("{:#?}\n", r);
                     }
-                    println!("");
                     Ok(1)
                 }
             }

From 8609c023832dfbd7a5cb104501b050c2ac4c3458 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 9 Aug 2021 12:53:53 -0400
Subject: [PATCH 80/81] minor change

---
 performance/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/performance/README.md b/performance/README.md
index a2ae7ef7fe3..83a706686f4 100644
--- a/performance/README.md
+++ b/performance/README.md
@@ -14,4 +14,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu
 - add more dbt commands to measure
 - possibly using the uploaded json artifacts to store these results so they can be graphed over time
 - reading new metrics from a file so no one has to edit rust source to add them to the suite
-- instead of building the rust every time, we could publish pull down the latest deployed version.
\ No newline at end of file
+- instead of building the rust every time, we could publish and pull down the latest version.

From 1fe53750faa8fd5d2eb10d59e7ec27a9f569aaa5 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 9 Aug 2021 12:55:42 -0400
Subject: [PATCH 81/81] add more future work

---
 performance/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/performance/README.md b/performance/README.md
index 83a706686f4..71aba30b989 100644
--- a/performance/README.md
+++ b/performance/README.md
@@ -15,3 +15,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu
 - possibly using the uploaded json artifacts to store these results so they can be graphed over time
 - reading new metrics from a file so no one has to edit rust source to add them to the suite
 - instead of building the rust every time, we could publish and pull down the latest version.
+- instead of manually setting the baseline version of dbt to test, pull down the latest stable version as the baseline.