From 9f62ec215344e7aa09327d4cd3328381b898092e Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 20 Jul 2021 09:28:57 -0400 Subject: [PATCH 01/81] add rust performance runner --- performance/README.md | 13 +++++++ performance/results/.gitignore | 5 +++ performance/runner/.gitignore | 2 + performance/runner/Cargo.lock | 7 ++++ performance/runner/Cargo.toml | 8 ++++ performance/runner/src/main.rs | 70 ++++++++++++++++++++++++++++++++++ 6 files changed, 105 insertions(+) create mode 100644 performance/README.md create mode 100644 performance/results/.gitignore create mode 100644 performance/runner/.gitignore create mode 100644 performance/runner/Cargo.lock create mode 100644 performance/runner/Cargo.toml create mode 100644 performance/runner/src/main.rs diff --git a/performance/README.md b/performance/README.md new file mode 100644 index 00000000000..8b9ce3058ab --- /dev/null +++ b/performance/README.md @@ -0,0 +1,13 @@ +# Performance Testing +This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually. + +## Adding a new project +Just make a new directory under projects. It will automatically be picked up by the tests. + +## Adding a new dbt command +In `runner/src/main.rs` add a metric to the `metrics` Vec in the main function. The Github Action will handle recompilation. + +## Future work +- add more projects to test different configurations that have been known bottlenecks +- add more dbt commands to measure +- consider storing these results so they can be graphed over time diff --git a/performance/results/.gitignore b/performance/results/.gitignore new file mode 100644 index 00000000000..b8e28556b4b --- /dev/null +++ b/performance/results/.gitignore @@ -0,0 +1,5 @@ +# all files here are generated results +* + +# except this one +!.gitignore diff --git a/performance/runner/.gitignore b/performance/runner/.gitignore new file mode 100644 index 00000000000..054a2acc8a4 --- /dev/null +++ b/performance/runner/.gitignore @@ -0,0 +1,2 @@ +target/ +projects/*/logs diff --git a/performance/runner/Cargo.lock b/performance/runner/Cargo.lock new file mode 100644 index 00000000000..0832b7312b5 --- /dev/null +++ b/performance/runner/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "runner" +version = "0.1.0" diff --git a/performance/runner/Cargo.toml b/performance/runner/Cargo.toml new file mode 100644 index 00000000000..77ff9695b77 --- /dev/null +++ b/performance/runner/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "runner" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs new file mode 100644 index 00000000000..31420ffe923 --- /dev/null +++ b/performance/runner/src/main.rs @@ -0,0 +1,70 @@ +use std::{env, fs, io}; +use std::process::{Command, ExitStatus, exit}; + + +#[derive(Debug, Clone)] +struct Metric<'a> { + name: &'a str, + prepare: &'a str, + cmd: &'a str, +} + +impl Metric<'_> { + fn outfile(&self, project: &str) -> String { + [self.name, "_", project, ".json"].join("") + } +} + +fn main() { + // TODO args lib + let args: Vec = env::args().collect(); + if args.len() < 2 { + println!("please provide the git project directory root"); + exit(1); + } + let git_directory = &args[1]; + + // to add a new metric to the test suite, simply define it in this list: + let metrics: Vec = vec![ + Metric { name:"parse", prepare: "rm -rf target/", cmd: "dbt parse --no-version-check" }, + ]; + + // list out all projects + let project_entries = fs::read_dir([&git_directory, "performance/projects"].join("")).unwrap(); + + let results: Result, io::Error > = project_entries.map(|entry| { + metrics.clone().into_iter().map(|metric| { + let path = entry.as_ref().unwrap().path(); + let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap(); + + Command::new("hyperfine") + .current_dir(&path) + .arg("--prepare") + .arg(metric.prepare) + .arg(metric.cmd) + .arg("--export-json") + .arg([&git_directory, "/performance/results/", &metric.outfile(project_name)].join("")) + // this prevents capture dbt output. Noisy, but good for debugging when tests fail. + .arg("--show-output") + .status() // use spawn() here instead for more information + }).collect::>>() + }).flatten().collect(); + + // only exit with status code 0 if everything ran as expected + match results { + // if dispatch of any of the commands failed, panic with that error + Err(e) => panic!("{}", e), + Ok(statuses) => { + for status in statuses { + match status.code() { + None => (), + Some(0) => (), + // if any child command exited with a non zero status code, exit with the same one here. + Some(nonzero) => exit(nonzero), + } + } + // everything ran as expected + exit(0); + }, + } +} From 9e3da391a787f42e2460bc2350a4c9970a59d6a3 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 20 Jul 2021 14:02:10 -0400 Subject: [PATCH 02/81] draft of regression testing workflow --- .github/workflows/performance.yml | 140 ++++++++++++++++++ .../dbt_project.yml | 38 +++++ .../models/path_0/node_0.sql | 1 + .../models/path_0/node_0.yml | 11 ++ .../models/path_0/node_1.sql | 3 + .../models/path_0/node_1.yml | 11 ++ .../models/path_0/node_2.sql | 3 + .../models/path_0/node_2.yml | 11 ++ .../dbt_project.yml | 38 +++++ .../models/path_0/node_0.sql | 1 + .../models/path_0/node_0.yml | 11 ++ .../models/path_0/node_1.sql | 3 + .../models/path_0/node_1.yml | 11 ++ .../models/path_0/node_2.sql | 3 + .../models/path_0/node_2.yml | 11 ++ performance/runner/src/main.rs | 10 +- 16 files changed, 302 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/performance.yml create mode 100644 performance/projects/01_mini_project_dont_merge/dbt_project.yml create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql create mode 100644 performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml create mode 100644 performance/projects/02_mini_project_dont_merge/dbt_project.yml create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql create mode 100644 performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml new file mode 100644 index 00000000000..5053f8ad9a5 --- /dev/null +++ b/.github/workflows/performance.yml @@ -0,0 +1,140 @@ + +# Schedule triggers +on: + # TODO this is just while developing + pull_request: + branches: + - 'develop' + - 'performance-regression-testing' + schedule: + # runs every day at 10:05pm + - cron: '5 22 * * *' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + + # runs any tests associated with the runner + # these tests make sure the runner logic is correct + # if there are no tests, it still quickly builds the project to fail fast + test-rust: + name: Test Runner + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + args: --manifest-path performance/runner/Cargo.toml + + # build an optimized binary to be used as the runner in later steps + build-rust: + needs: [test-rust] + name: Build Runner + runs-on: ubuntu-latest + env: + RUSTFLAGS: "-D warnings" + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: build + args: --release --manifest-path performance/runner/Cargo.toml + - uses: actions/upload-artifact@v2 + with: + name: runner + path: performance/runner/target/release/runner + + # run the performance measurements on the current or default branch + measure-dev: + needs: [build-rust] + name: Measure Dev Branch + runs-on: ubuntu-latest + steps: + - name: checkout dev + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2.2.2 + with: + python-version: '3.8' + - name: install dbt + run: pip install -r dev-requirements.txt -r editable-requirements.txt + - name: install hyperfine + run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb + - uses: actions/download-artifact@v2 + with: + name: runner + - name: change permissions + run: chmod +x ./runner + - name: run + run: ./runner ${{ github.workspace }}/performance/projects/ + - uses: actions/upload-artifact@v2 + with: + name: dev-results + path: performance/results/ + + # run the performance measurements on the latest release branch + measure-latest: + needs: [build-rust] + name: Measure Latest Branch + runs-on: ubuntu-latest + steps: + - name: checkout latest + uses: actions/checkout@v2 + with: + ref: '0.20.latest' + - name: Setup Python + uses: actions/setup-python@v2.2.2 + with: + python-version: '3.8' + - name: move repo up a level + run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest + - name: debug line + run: ls ${{ github.workspace }}/../latest/dbt/ + # installation creates egg-links so we have to preserve source + - name: install dbt from new location + run: cd ${{ github.workspace }}/../latest/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt + # checkout the current branch to get all the target projects + # this deletes the old checked out code which is why we had to copy before + - name: checkout dev + uses: actions/checkout@v2 + - name: install hyperfine + run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb + - uses: actions/download-artifact@v2 + with: + name: runner + - name: change permissions + run: chmod +x ./runner + - name: run runner + run: ./runner ${{ github.workspace }}/performance/projects/ + - uses: actions/upload-artifact@v2 + with: + name: latest-results + path: performance/results/ + + compare-results: + needs: [measure-dev, measure-latest] + name: Compare Results + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v2 + with: + name: dev-results + - name: ls result files + run: ls + - uses: actions/download-artifact@v2 + with: + name: latest-results + - name: ls result files + run: ls + # TODO actually compare diff --git a/performance/projects/01_mini_project_dont_merge/dbt_project.yml b/performance/projects/01_mini_project_dont_merge/dbt_project.yml new file mode 100644 index 00000000000..95e1020def8 --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'my_new_package' +version: 1.0.0 +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'default' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_modules" + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! + +# In this example config, we tell dbt to build all models in the example/ directory +# as views (the default). These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + my_new_package: + # Applies to all files under models/example/ + example: + materialized: view diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql new file mode 100644 index 00000000000..26d9cae7b5b --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql @@ -0,0 +1 @@ +select 1 as id \ No newline at end of file diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml new file mode 100644 index 00000000000..282e56882f0 --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_0 +version: 2 diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql new file mode 100644 index 00000000000..cb5b97f45c1 --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql @@ -0,0 +1,3 @@ +select 1 as id +union all +select * from {{ ref('node_0') }} \ No newline at end of file diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml new file mode 100644 index 00000000000..2899ddf532f --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_1 +version: 2 diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql new file mode 100644 index 00000000000..cb5b97f45c1 --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql @@ -0,0 +1,3 @@ +select 1 as id +union all +select * from {{ ref('node_0') }} \ No newline at end of file diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml new file mode 100644 index 00000000000..d2582eabbd8 --- /dev/null +++ b/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_2 +version: 2 diff --git a/performance/projects/02_mini_project_dont_merge/dbt_project.yml b/performance/projects/02_mini_project_dont_merge/dbt_project.yml new file mode 100644 index 00000000000..95e1020def8 --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'my_new_package' +version: 1.0.0 +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'default' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_modules" + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! + +# In this example config, we tell dbt to build all models in the example/ directory +# as views (the default). These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + my_new_package: + # Applies to all files under models/example/ + example: + materialized: view diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql new file mode 100644 index 00000000000..26d9cae7b5b --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql @@ -0,0 +1 @@ +select 1 as id \ No newline at end of file diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml new file mode 100644 index 00000000000..282e56882f0 --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_0 +version: 2 diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql new file mode 100644 index 00000000000..cb5b97f45c1 --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql @@ -0,0 +1,3 @@ +select 1 as id +union all +select * from {{ ref('node_0') }} \ No newline at end of file diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml new file mode 100644 index 00000000000..2899ddf532f --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_1 +version: 2 diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql new file mode 100644 index 00000000000..cb5b97f45c1 --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql @@ -0,0 +1,3 @@ +select 1 as id +union all +select * from {{ ref('node_0') }} \ No newline at end of file diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml new file mode 100644 index 00000000000..d2582eabbd8 --- /dev/null +++ b/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml @@ -0,0 +1,11 @@ +models: +- columns: + - name: id + tests: + - unique + - not_null + - relationships: + field: id + to: node_0 + name: node_2 +version: 2 diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 31420ffe923..2648be5f061 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -1,4 +1,5 @@ use std::{env, fs, io}; +use std::path::Path; use std::process::{Command, ExitStatus, exit}; @@ -19,10 +20,10 @@ fn main() { // TODO args lib let args: Vec = env::args().collect(); if args.len() < 2 { - println!("please provide the git project directory root"); + println!("please provide the target projects directory"); exit(1); } - let git_directory = &args[1]; + let projects_directory = &args[1]; // to add a new metric to the test suite, simply define it in this list: let metrics: Vec = vec![ @@ -30,7 +31,8 @@ fn main() { ]; // list out all projects - let project_entries = fs::read_dir([&git_directory, "performance/projects"].join("")).unwrap(); + let path = Path::new(&projects_directory); + let project_entries = fs::read_dir(path).unwrap(); let results: Result, io::Error > = project_entries.map(|entry| { metrics.clone().into_iter().map(|metric| { @@ -43,7 +45,7 @@ fn main() { .arg(metric.prepare) .arg(metric.cmd) .arg("--export-json") - .arg([&git_directory, "/performance/results/", &metric.outfile(project_name)].join("")) + .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) // this prevents capture dbt output. Noisy, but good for debugging when tests fail. .arg("--show-output") .status() // use spawn() here instead for more information From ee58d27d94cf13c6e3e2b8f2bf2bb17327396f5e Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 21 Jul 2021 17:28:28 -0400 Subject: [PATCH 03/81] use a profiles.yml file --- performance/projects/config/profiles.yml | 12 ++++++++++++ performance/runner/src/main.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 performance/projects/config/profiles.yml diff --git a/performance/projects/config/profiles.yml b/performance/projects/config/profiles.yml new file mode 100644 index 00000000000..7b1c39a1f9e --- /dev/null +++ b/performance/projects/config/profiles.yml @@ -0,0 +1,12 @@ +default: + target: dev + outputs: + dev: + type: postgres + host: localhost + user: dummy + password: dummy_password + port: 5432 + dbname: dummy + schema: dummy + threads: 4 \ No newline at end of file diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 2648be5f061..3cb91e991bd 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -43,7 +43,7 @@ fn main() { .current_dir(&path) .arg("--prepare") .arg(metric.prepare) - .arg(metric.cmd) + .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join("")) .arg("--export-json") .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) // this prevents capture dbt output. Noisy, but good for debugging when tests fail. From d0773f33460b1c0bff7595c478bceefa4d26419d Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 21 Jul 2021 17:34:22 -0400 Subject: [PATCH 04/81] warmup fs caches --- performance/runner/src/main.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 3cb91e991bd..31a61801986 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -41,6 +41,9 @@ fn main() { Command::new("hyperfine") .current_dir(&path) + // warms filesystem caches by running the command first without counting it. + // alternatively we could clear them before each run + .arg("--warmup 1") .arg("--prepare") .arg(metric.prepare) .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join("")) From 4ff3f6d4e84b7cb4c0184da2f207d181a8f51449 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 21 Jul 2021 17:35:54 -0400 Subject: [PATCH 05/81] moved config dir out of projects dir --- performance/{projects/config => project_config}/profiles.yml | 0 performance/runner/src/main.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename performance/{projects/config => project_config}/profiles.yml (100%) diff --git a/performance/projects/config/profiles.yml b/performance/project_config/profiles.yml similarity index 100% rename from performance/projects/config/profiles.yml rename to performance/project_config/profiles.yml diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 31a61801986..eaddd2363cd 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -46,7 +46,7 @@ fn main() { .arg("--warmup 1") .arg("--prepare") .arg(metric.prepare) - .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/config/"].join("")) + .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join("")) .arg("--export-json") .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) // this prevents capture dbt output. Noisy, but good for debugging when tests fail. From 75d3d87d64a47f6e7cfb90bda761f6afa7a4eaf4 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 21 Jul 2021 18:00:07 -0400 Subject: [PATCH 06/81] fix warmup syntax --- performance/runner/src/main.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index eaddd2363cd..b57538bdefd 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -43,7 +43,8 @@ fn main() { .current_dir(&path) // warms filesystem caches by running the command first without counting it. // alternatively we could clear them before each run - .arg("--warmup 1") + .arg("--warmup") + .arg("1") .arg("--prepare") .arg(metric.prepare) .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join("")) @@ -58,7 +59,7 @@ fn main() { // only exit with status code 0 if everything ran as expected match results { // if dispatch of any of the commands failed, panic with that error - Err(e) => panic!("{}", e), + Err(e) => panic!("{:?}", e), Ok(statuses) => { for status in statuses { match status.code() { From 1d7e8349ede8514218968c800660f14898efd312 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 22 Jul 2021 12:01:30 -0400 Subject: [PATCH 07/81] draft of comparison --- performance/comparison/Cargo.lock | 89 ++++++++++++++++++++++++++++++ performance/comparison/Cargo.toml | 8 +++ performance/comparison/src/main.rs | 53 ++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 performance/comparison/Cargo.lock create mode 100644 performance/comparison/Cargo.toml create mode 100644 performance/comparison/src/main.rs diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock new file mode 100644 index 00000000000..4bf9976c9a0 --- /dev/null +++ b/performance/comparison/Cargo.lock @@ -0,0 +1,89 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "comparison" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + +[[package]] +name = "proc-macro2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml new file mode 100644 index 00000000000..df15857b46e --- /dev/null +++ b/performance/comparison/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "comparison" +version = "0.1.0" +edition = "2018" + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs new file mode 100644 index 00000000000..e3b7335c3cb --- /dev/null +++ b/performance/comparison/src/main.rs @@ -0,0 +1,53 @@ +use serde::{Deserialize, Serialize}; + + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct Measurement { + command: String, + mean: f64, + stddev: f64, + median: f64, + user: f64, + system: f64, + min: f64, + max: f64, + times: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct Measurements { + results: Vec, +} + +fn main() { + let data = r#"{ +"results": [ + { + "command": "dbt parse --no-version-check", + "mean": 3.050361809595, + "stddev": 0.04615108237908544, + "median": 3.034926999095, + "user": 2.7580889199999996, + "system": 0.22864483500000002, + "min": 3.005364834595, + "max": 3.145752726595, + "times": [ + 3.023607223595, + 3.0423504165949997, + 3.145752726595, + 3.114517602595, + 3.062246401595, + 3.051930277595, + 3.027503581595, + 3.013381462595, + 3.005364834595, + 3.016963568595 + ] + } +] +}"#; + + let measurements: Measurements = serde_json::from_str(data).unwrap(); + + println!("{:?}", measurements); +} \ No newline at end of file From a29367b7fe50e6b87a8eac5b717162e183da67f4 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 22 Jul 2021 12:30:35 -0400 Subject: [PATCH 08/81] read files and parse json --- performance/comparison/src/main.rs | 62 +++++++++++++++--------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index e3b7335c3cb..e42d879481c 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -1,5 +1,8 @@ use serde::{Deserialize, Serialize}; - +use serde_json::Result; +use std::path::Path; +use std::process::exit; +use std::{env, fs}; #[derive(Debug, Clone, Serialize, Deserialize)] struct Measurement { @@ -20,34 +23,33 @@ struct Measurements { } fn main() { - let data = r#"{ -"results": [ - { - "command": "dbt parse --no-version-check", - "mean": 3.050361809595, - "stddev": 0.04615108237908544, - "median": 3.034926999095, - "user": 2.7580889199999996, - "system": 0.22864483500000002, - "min": 3.005364834595, - "max": 3.145752726595, - "times": [ - 3.023607223595, - 3.0423504165949997, - 3.145752726595, - 3.114517602595, - 3.062246401595, - 3.051930277595, - 3.027503581595, - 3.013381462595, - 3.005364834595, - 3.016963568595 - ] + // TODO args lib + let args: Vec = env::args().collect(); + if args.len() < 2 { + println!("please provide the results directory"); + exit(1); } -] -}"#; - - let measurements: Measurements = serde_json::from_str(data).unwrap(); + let results_directory = &args[1]; + + let path = Path::new(&results_directory); + let result_files = fs::read_dir(path).unwrap(); + let x: Result> = result_files + .into_iter() + .map(|f| { + f.unwrap().path() + }) + .filter(|filename| { + filename + .extension() + .and_then(|ext| ext.to_str()) + .map_or(false, |ext| ext.ends_with("json")) + }) + .map(|filename| { + println!("{:?}", filename); + let contents = fs::read_to_string(filename).unwrap(); + serde_json::from_str::(&contents) + }) + .collect(); - println!("{:?}", measurements); -} \ No newline at end of file + println!("{:?}", x); +} From 108b55bdc3e49266c55f83955c31ee4406d7d874 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 22 Jul 2021 13:50:17 -0400 Subject: [PATCH 09/81] add regression type --- performance/comparison/src/main.rs | 38 ++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index e42d879481c..7dbb3d91eb1 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -22,6 +22,28 @@ struct Measurements { results: Vec, } +struct Regression { + threshold: f64, + difference: f64, + base: Measurement, + latest: Measurement, +} + +fn regression(base: &Measurement, latest: &Measurement) -> Option { + let threshold = 1.05; // 5% regression threshold + let difference = latest.median / base.median; + if difference > threshold { + Some(Regression { + threshold: threshold, + difference: difference, + base: base.clone(), + latest: latest.clone(), + }) + } else { + None + } +} + fn main() { // TODO args lib let args: Vec = env::args().collect(); @@ -33,11 +55,9 @@ fn main() { let path = Path::new(&results_directory); let result_files = fs::read_dir(path).unwrap(); - let x: Result> = result_files + let x: Result> = result_files .into_iter() - .map(|f| { - f.unwrap().path() - }) + .map(|f| f.unwrap().path()) .filter(|filename| { filename .extension() @@ -47,9 +67,17 @@ fn main() { .map(|filename| { println!("{:?}", filename); let contents = fs::read_to_string(filename).unwrap(); - serde_json::from_str::(&contents) + let measurements: Result = serde_json::from_str(&contents); + // the way we're running these, the files will each contain exactly one measurement + measurements.map(|x| x.results[0].clone()) }) .collect(); + // TODO exit(1) when Regression is present + match x { + Err(e) => panic!("{}", e), + Ok(_) => (), + } + println!("{:?}", x); } From c0e2023c817979fa73d8a45848a8736166bcc76c Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Jul 2021 15:52:28 -0400 Subject: [PATCH 10/81] update profiles dir path --- performance/runner/src/main.rs | 62 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index b57538bdefd..2f0eedb5ca4 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -1,7 +1,6 @@ -use std::{env, fs, io}; use std::path::Path; -use std::process::{Command, ExitStatus, exit}; - +use std::process::{exit, Command, ExitStatus}; +use std::{env, fs, io}; #[derive(Debug, Clone)] struct Metric<'a> { @@ -26,35 +25,44 @@ fn main() { let projects_directory = &args[1]; // to add a new metric to the test suite, simply define it in this list: - let metrics: Vec = vec![ - Metric { name:"parse", prepare: "rm -rf target/", cmd: "dbt parse --no-version-check" }, - ]; + let metrics: Vec = vec![Metric { + name: "parse", + prepare: "rm -rf target/", + cmd: "dbt parse --no-version-check", + }]; // list out all projects let path = Path::new(&projects_directory); let project_entries = fs::read_dir(path).unwrap(); - let results: Result, io::Error > = project_entries.map(|entry| { - metrics.clone().into_iter().map(|metric| { - let path = entry.as_ref().unwrap().path(); - let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap(); + let results: Result, io::Error> = project_entries + .map(|entry| { + metrics + .clone() + .into_iter() + .map(|metric| { + let path = entry.as_ref().unwrap().path(); + let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap(); - Command::new("hyperfine") - .current_dir(&path) - // warms filesystem caches by running the command first without counting it. - // alternatively we could clear them before each run - .arg("--warmup") - .arg("1") - .arg("--prepare") - .arg(metric.prepare) - .arg([metric.cmd, " --profiles-dir ", &projects_directory, "/../project_config/"].join("")) - .arg("--export-json") - .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) - // this prevents capture dbt output. Noisy, but good for debugging when tests fail. - .arg("--show-output") - .status() // use spawn() here instead for more information - }).collect::>>() - }).flatten().collect(); + Command::new("hyperfine") + .current_dir(&path) + // warms filesystem caches by running the command first without counting it. + // alternatively we could clear them before each run + .arg("--warmup") + .arg("1") + .arg("--prepare") + .arg(metric.prepare) + .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) + .arg("--export-json") + .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) + // this prevents capture dbt output. Noisy, but good for debugging when tests fail. + .arg("--show-output") + .status() // use spawn() here instead for more information + }) + .collect::>>() + }) + .flatten() + .collect(); // only exit with status code 0 if everything ran as expected match results { @@ -71,6 +79,6 @@ fn main() { } // everything ran as expected exit(0); - }, + } } } From 34e2c4f90bd120000ff7e06cf7a2cd1043d29c3c Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Jul 2021 16:16:21 -0400 Subject: [PATCH 11/81] fix results output dir --- performance/runner/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 2f0eedb5ca4..5dd8d53928e 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -54,7 +54,7 @@ fn main() { .arg(metric.prepare) .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) .arg("--export-json") - .arg([&projects_directory, "/", &metric.outfile(project_name)].join("")) + .arg(["../../results/", &metric.outfile(project_name)].join("")) // this prevents capture dbt output. Noisy, but good for debugging when tests fail. .arg("--show-output") .status() // use spawn() here instead for more information From a45c9d019253054a55d7878b704bc2044dc9d3e1 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Jul 2021 16:24:46 -0400 Subject: [PATCH 12/81] add new arg for runner for branch name --- .github/workflows/performance.yml | 4 ++-- performance/runner/src/main.rs | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 5053f8ad9a5..478f12eda63 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -77,7 +77,7 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run - run: ./runner ${{ github.workspace }}/performance/projects/ + run: ./runner ${{ github.workspace }}/performance/projects/ dev - uses: actions/upload-artifact@v2 with: name: dev-results @@ -116,7 +116,7 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run runner - run: ./runner ${{ github.workspace }}/performance/projects/ + run: ./runner ${{ github.workspace }}/performance/projects/ latest - uses: actions/upload-artifact@v2 with: name: latest-results diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 5dd8d53928e..86c8e01cdd1 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -10,19 +10,20 @@ struct Metric<'a> { } impl Metric<'_> { - fn outfile(&self, project: &str) -> String { - [self.name, "_", project, ".json"].join("") + fn outfile(&self, project: &str, branch: &str) -> String { + [branch, "_", self.name, "_", project, ".json"].join("") } } fn main() { // TODO args lib let args: Vec = env::args().collect(); - if args.len() < 2 { - println!("please provide the target projects directory"); + if args.len() < 3 { + println!("please provide the target projects directory and branch name"); exit(1); } let projects_directory = &args[1]; + let dbt_branch = &args[2]; // to add a new metric to the test suite, simply define it in this list: let metrics: Vec = vec![Metric { @@ -54,7 +55,9 @@ fn main() { .arg(metric.prepare) .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) .arg("--export-json") - .arg(["../../results/", &metric.outfile(project_name)].join("")) + .arg( + ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""), + ) // this prevents capture dbt output. Noisy, but good for debugging when tests fail. .arg("--show-output") .status() // use spawn() here instead for more information From 4c06689ff56006a2f661dd77898277c7a597b83a Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Jul 2021 16:54:57 -0400 Subject: [PATCH 13/81] create a tuple of measurements to group results by --- performance/comparison/src/main.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 7dbb3d91eb1..67f481f9669 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -55,7 +55,7 @@ fn main() { let path = Path::new(&results_directory); let result_files = fs::read_dir(path).unwrap(); - let x: Result> = result_files + let x: Result> = result_files .into_iter() .map(|f| f.unwrap().path()) .filter(|filename| { @@ -66,13 +66,22 @@ fn main() { }) .map(|filename| { println!("{:?}", filename); - let contents = fs::read_to_string(filename).unwrap(); + let contents = fs::read_to_string(&filename).unwrap(); let measurements: Result = serde_json::from_str(&contents); + let results = measurements.map(|x| x.results[0].clone()); + let filepath = filename.into_os_string().into_string().unwrap(); + let parts: Vec<&str> = filepath.split("_").collect(); + // the way we're running these, the files will each contain exactly one measurement - measurements.map(|x| x.results[0].clone()) + results.map(|r| (parts[0].to_owned(), parts[1..].join(""), r.clone())) }) .collect(); + // TODO + // - group by project and metric + // - each group will have 2 measurements for each branch + // - perfrom regressions on each pair + // TODO exit(1) when Regression is present match x { Err(e) => panic!("{}", e), From c8bc25d11ae7a8d97b553cab7709cf9e337ffff3 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Jul 2021 17:05:08 -0400 Subject: [PATCH 14/81] add struct --- performance/comparison/src/main.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 67f481f9669..320585aff61 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -44,6 +44,12 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option { } } +struct MeasurementGroup { + branch: String, + run: String, + measurement: Measurement, +} + fn main() { // TODO args lib let args: Vec = env::args().collect(); @@ -55,7 +61,7 @@ fn main() { let path = Path::new(&results_directory); let result_files = fs::read_dir(path).unwrap(); - let x: Result> = result_files + let x: Result> = result_files .into_iter() .map(|f| f.unwrap().path()) .filter(|filename| { @@ -73,7 +79,11 @@ fn main() { let parts: Vec<&str> = filepath.split("_").collect(); // the way we're running these, the files will each contain exactly one measurement - results.map(|r| (parts[0].to_owned(), parts[1..].join(""), r.clone())) + results.map(|r| MeasurementGroup { + branch: parts[0].to_owned(), + run: parts[1..].join(""), + measurement: r.clone(), + }) }) .collect(); From 37b31d10c85b53dde991b54aa513278e6d8b991c Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Tue, 27 Jul 2021 11:28:32 -0400 Subject: [PATCH 15/81] add derived traits to structs --- performance/comparison/src/main.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 320585aff61..d90f5aa31e1 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -4,7 +4,7 @@ use std::path::Path; use std::process::exit; use std::{env, fs}; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] struct Measurement { command: String, mean: f64, @@ -17,11 +17,12 @@ struct Measurement { times: Vec, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] struct Measurements { results: Vec, } +#[derive(Debug, Clone, PartialEq)] struct Regression { threshold: f64, difference: f64, @@ -34,8 +35,8 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option { let difference = latest.median / base.median; if difference > threshold { Some(Regression { - threshold: threshold, - difference: difference, + threshold, + difference, base: base.clone(), latest: latest.clone(), }) @@ -44,6 +45,7 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option { } } +#[derive(Debug, Clone, PartialEq)] struct MeasurementGroup { branch: String, run: String, @@ -61,7 +63,7 @@ fn main() { let path = Path::new(&results_directory); let result_files = fs::read_dir(path).unwrap(); - let x: Result> = result_files + let measurements: Result> = result_files .into_iter() .map(|f| f.unwrap().path()) .filter(|filename| { @@ -90,13 +92,15 @@ fn main() { // TODO // - group by project and metric // - each group will have 2 measurements for each branch - // - perfrom regressions on each pair + // - perform regressions on each pair + // dev_parse_02_mini_project_dont_merge.json + // dev_parse_01_mini_project_dont_merge.json // TODO exit(1) when Regression is present - match x { + match measurements { Err(e) => panic!("{}", e), Ok(_) => (), } - println!("{:?}", x); + println!("{:?}", measurements); } From 4ddba7e44c6b232e5348337fd4dc8b70742785cc Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Tue, 27 Jul 2021 12:44:06 -0400 Subject: [PATCH 16/81] --wip-- --- performance/comparison/src/main.rs | 116 +++++++++++++++++---------- performance/project_config/.user.yml | 1 + 2 files changed, 74 insertions(+), 43 deletions(-) create mode 100644 performance/project_config/.user.yml diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index d90f5aa31e1..e3335277f83 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -26,18 +26,25 @@ struct Measurements { struct Regression { threshold: f64, difference: f64, - base: Measurement, + baseline: Measurement, latest: Measurement, } -fn regression(base: &Measurement, latest: &Measurement) -> Option { +#[derive(Debug, Clone, PartialEq)] +struct MeasurementGroup { + version: String, + run: String, + measurement: Measurement, +} + +fn regression(baseline: &Measurement, latest: &Measurement) -> Option { let threshold = 1.05; // 5% regression threshold - let difference = latest.median / base.median; + let difference = latest.median / baseline.median; if difference > threshold { Some(Regression { threshold, difference, - base: base.clone(), + baseline: baseline.clone(), latest: latest.clone(), }) } else { @@ -45,25 +52,12 @@ fn regression(base: &Measurement, latest: &Measurement) -> Option { } } -#[derive(Debug, Clone, PartialEq)] -struct MeasurementGroup { - branch: String, - run: String, - measurement: Measurement, -} - -fn main() { - // TODO args lib - let args: Vec = env::args().collect(); - if args.len() < 2 { - println!("please provide the results directory"); - exit(1); - } - let results_directory = &args[1]; +fn measurements_from_files( + results_directory: &Path, +) -> std::result::Result, std::io::Error> { + let result_files = fs::read_dir(results_directory)?; - let path = Path::new(&results_directory); - let result_files = fs::read_dir(path).unwrap(); - let measurements: Result> = result_files + result_files .into_iter() .map(|f| f.unwrap().path()) .filter(|filename| { @@ -74,33 +68,69 @@ fn main() { }) .map(|filename| { println!("{:?}", filename); - let contents = fs::read_to_string(&filename).unwrap(); - let measurements: Result = serde_json::from_str(&contents); - let results = measurements.map(|x| x.results[0].clone()); - let filepath = filename.into_os_string().into_string().unwrap(); - let parts: Vec<&str> = filepath.split("_").collect(); - - // the way we're running these, the files will each contain exactly one measurement - results.map(|r| MeasurementGroup { - branch: parts[0].to_owned(), - run: parts[1..].join(""), - measurement: r.clone(), - }) + let contents = fs::read_to_string(&filename); + contents.map(|c| serde_json::from_str::(&c).unwrap()) //TODO rm unwrap! }) - .collect(); + .collect() +} + +fn detect_regressions(measurements: Vec) -> Vec { + panic!() +} + +fn main() { + // TODO args lib + let args: Vec = env::args().collect(); + if args.len() < 2 { + println!("please provide the results directory"); + exit(1); + } + let results_directory = &args[1]; + + let foo = measurements_from_files(Path::new(&results_directory)); + foo.map(|x| detect_regressions(x)); + + // let measurements: Result)>> = result_files + // .into_iter() + // .map(|f| f.unwrap().path()) + // .filter(|filename| { + // filename + // .extension() + // .and_then(|ext| ext.to_str()) + // .map_or(false, |ext| ext.ends_with("json")) + // }) + // .map(|filename| { + // println!("{:?}", filename); + // let contents = fs::read_to_string(&filename).unwrap(); + // let measurements: Result = serde_json::from_str(&contents); + // let results = measurements.map(|x| x.results[0].clone()); + // let filepath = filename.into_os_string().into_string().unwrap(); + // let parts: Vec<&str> = filepath.split("_").collect(); + + // // the way we're running these, the files will each contain exactly one measurement + // results.map(|r| MeasurementGroup { + // version: parts[0].to_owned(), + // run: parts[1..].join(""), + // measurement: r.clone(), + // }) + // }) + // .collect(); // TODO // - group by project and metric // - each group will have 2 measurements for each branch // - perform regressions on each pair - // dev_parse_02_mini_project_dont_merge.json - // dev_parse_01_mini_project_dont_merge.json + // latest_parse_02_mini_project_dont_merge.json + // latest_parse_01_mini_project_dont_merge.json + // baseline_parse_02_mini_project_dont_merge.json + // baseline_parse_01_mini_project_dont_merge.json + // command, project -> 2 version + // let x: Vec<(&str, Vec)>; // TODO exit(1) when Regression is present - match measurements { - Err(e) => panic!("{}", e), - Ok(_) => (), - } - - println!("{:?}", measurements); + // match foo { + // Err(&e) => panic!("{}", &e), + // Ok(_) => (), + // } + foo.clone().map(|f| println!("{:?}", f)).unwrap(); } diff --git a/performance/project_config/.user.yml b/performance/project_config/.user.yml new file mode 100644 index 00000000000..1870bd1ea47 --- /dev/null +++ b/performance/project_config/.user.yml @@ -0,0 +1 @@ +id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b From 45bb955b55fc37f965ceb7b8299fe719de4f93f6 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 28 Jul 2021 15:54:04 -0400 Subject: [PATCH 17/81] still wip but compiles --- performance/comparison/src/main.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index e3335277f83..6492b8483df 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -87,8 +87,8 @@ fn main() { } let results_directory = &args[1]; - let foo = measurements_from_files(Path::new(&results_directory)); - foo.map(|x| detect_regressions(x)); + let measurements = measurements_from_files(Path::new(&results_directory)); + let regressions = measurements.map(|x| detect_regressions(x)); // let measurements: Result)>> = result_files // .into_iter() @@ -132,5 +132,10 @@ fn main() { // Err(&e) => panic!("{}", &e), // Ok(_) => (), // } - foo.clone().map(|f| println!("{:?}", f)).unwrap(); + + // print the regressions or throw + match regressions { + Err(e) => panic!("{}", e), + Ok(rs) => println!("{:?}", rs) + } } From d5662ef34c2784a0adab45b966da1725094a1b23 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 16:33:43 -0400 Subject: [PATCH 18/81] wrap up comparsion logic --- performance/comparison/Cargo.lock | 37 ++++++++ performance/comparison/Cargo.toml | 2 + performance/comparison/src/main.rs | 147 ++++++++++++++++++----------- 3 files changed, 130 insertions(+), 56 deletions(-) diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock index 4bf9976c9a0..76e80198abc 100644 --- a/performance/comparison/Cargo.lock +++ b/performance/comparison/Cargo.lock @@ -6,8 +6,25 @@ version = 3 name = "comparison" version = "0.1.0" dependencies = [ + "itertools", "serde", "serde_json", + "thiserror", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "itertools" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +dependencies = [ + "either", ] [[package]] @@ -82,6 +99,26 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "thiserror" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-xid" version = "0.2.2" diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml index df15857b46e..59ee1954fbd 100644 --- a/performance/comparison/Cargo.toml +++ b/performance/comparison/Cargo.toml @@ -4,5 +4,7 @@ version = "0.1.0" edition = "2018" [dependencies] +itertools = "0.10.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +thiserror = "1.0.26" diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 6492b8483df..c3ed7e607fc 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -1,8 +1,12 @@ +use itertools::Itertools; use serde::{Deserialize, Serialize}; -use serde_json::Result; -use std::path::Path; +use std::path::{ + Path, + PathBuf +}; use std::process::exit; -use std::{env, fs}; +use std::{env, fs, io}; +use thiserror::Error; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] struct Measurement { @@ -37,6 +41,20 @@ struct MeasurementGroup { measurement: Measurement, } +#[derive(Error, Debug)] +enum TestError { + #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())] + BadJSONErr(PathBuf, serde_json::Error), + #[error("ReadErr: The file cannot be read. \nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())] + ReadErr(PathBuf, io::Error), + #[error("MissingFilenameErr: The path provided does not specify a file. \nFilepath: {}\n", .0.to_string_lossy().into_owned())] + MissingFilenameErr(PathBuf), + #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] + FilenameNotUnicodeErr(PathBuf), + #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] + BadFileContentsErr(PathBuf, io::Error), +} + fn regression(baseline: &Measurement, latest: &Measurement) -> Option { let threshold = 1.05; // 5% regression threshold let difference = latest.median / baseline.median; @@ -52,10 +70,13 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Option std::result::Result, std::io::Error> { - let result_files = fs::read_dir(results_directory)?; +) -> Result, TestError> { + let result_files = fs::read_dir(results_directory) + .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), e)))?; result_files .into_iter() @@ -68,14 +89,55 @@ fn measurements_from_files( }) .map(|filename| { println!("{:?}", filename); - let contents = fs::read_to_string(&filename); - contents.map(|c| serde_json::from_str::(&c).unwrap()) //TODO rm unwrap! + fs::read_to_string(&filename) + .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e))) + .and_then(|contents| + serde_json::from_str::(&contents) + .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e))) + ) + .map(|m| (filename, m)) }) .collect() } -fn detect_regressions(measurements: Vec) -> Vec { - panic!() +// given a list of filename-measurement pairs, detect any regressions by grouping +// measurements together by filename +fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result, TestError> { + let mut measurement_groups: Vec = measurements + .into_iter() + .map(|(p, m)| { + p.file_name() + .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf())) + .and_then(|name| name.to_str().ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf()))) + .map(|name| { + let parts: Vec<&str> = name.split("_").collect(); + MeasurementGroup { + version: parts[0].to_owned(), + run: parts[1..].join(""), + measurement: m.clone(), + } + }) + }) + .collect::, TestError>>()?; + + measurement_groups + .sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version))); + + // locking up mutation + let sorted_measurement_groups = measurement_groups; + + let x: Vec = sorted_measurement_groups + .into_iter() + .group_by(|x| (x.run.clone(), x.version.clone())) + .into_iter() + .map(|(_, group)| { + let g: Vec = group.collect(); + regression(&g[0].measurement, &g[1].measurement) + }) + .flatten() + .collect(); + + Ok(x) } fn main() { @@ -87,55 +149,28 @@ fn main() { } let results_directory = &args[1]; - let measurements = measurements_from_files(Path::new(&results_directory)); - let regressions = measurements.map(|x| detect_regressions(x)); - - // let measurements: Result)>> = result_files - // .into_iter() - // .map(|f| f.unwrap().path()) - // .filter(|filename| { - // filename - // .extension() - // .and_then(|ext| ext.to_str()) - // .map_or(false, |ext| ext.ends_with("json")) - // }) - // .map(|filename| { - // println!("{:?}", filename); - // let contents = fs::read_to_string(&filename).unwrap(); - // let measurements: Result = serde_json::from_str(&contents); - // let results = measurements.map(|x| x.results[0].clone()); - // let filepath = filename.into_os_string().into_string().unwrap(); - // let parts: Vec<&str> = filepath.split("_").collect(); - - // // the way we're running these, the files will each contain exactly one measurement - // results.map(|r| MeasurementGroup { - // version: parts[0].to_owned(), - // run: parts[1..].join(""), - // measurement: r.clone(), - // }) - // }) - // .collect(); - - // TODO - // - group by project and metric - // - each group will have 2 measurements for each branch - // - perform regressions on each pair - // latest_parse_02_mini_project_dont_merge.json - // latest_parse_01_mini_project_dont_merge.json - // baseline_parse_02_mini_project_dont_merge.json - // baseline_parse_01_mini_project_dont_merge.json - // command, project -> 2 version - // let x: Vec<(&str, Vec)>; - - // TODO exit(1) when Regression is present - // match foo { - // Err(&e) => panic!("{}", &e), - // Ok(_) => (), - // } + let regressions = measurements_from_files(Path::new(&results_directory)) + .and_then(|v| { + let v_next: Vec<(PathBuf, Measurement)> = v.into_iter() + // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` + .map(|(p, ms)| (p, ms.results[0].clone())) + .collect(); + + detect_regressions(&v_next) + }); - // print the regressions or throw match regressions { Err(e) => panic!("{}", e), - Ok(rs) => println!("{:?}", rs) + Ok(rs) => match rs[..] { + [] => println!("congrats! no regressions"), + _ => { + for r in rs { + println!("{:?}", r); + } + println!("the above regressions were found."); + exit(1) + }, + }, + } } From 80951ae973a514aae14fedc6b6ec61995b1ece22 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 16:34:47 -0400 Subject: [PATCH 19/81] fmt --- performance/comparison/src/main.rs | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index c3ed7e607fc..5d7be748429 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -1,9 +1,6 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; -use std::path::{ - Path, - PathBuf -}; +use std::path::{Path, PathBuf}; use std::process::exit; use std::{env, fs, io}; use thiserror::Error; @@ -91,10 +88,10 @@ fn measurements_from_files( println!("{:?}", filename); fs::read_to_string(&filename) .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e))) - .and_then(|contents| + .and_then(|contents| { serde_json::from_str::(&contents) .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e))) - ) + }) .map(|m| (filename, m)) }) .collect() @@ -102,14 +99,19 @@ fn measurements_from_files( // given a list of filename-measurement pairs, detect any regressions by grouping // measurements together by filename -fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result, TestError> { +fn detect_regressions( + measurements: &[(PathBuf, Measurement)], +) -> Result, TestError> { let mut measurement_groups: Vec = measurements .into_iter() .map(|(p, m)| { p.file_name() .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf())) - .and_then(|name| name.to_str().ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf()))) - .map(|name| { + .and_then(|name| { + name.to_str() + .ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf())) + }) + .map(|name| { let parts: Vec<&str> = name.split("_").collect(); MeasurementGroup { version: parts[0].to_owned(), @@ -119,13 +121,12 @@ fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result, TestError>>()?; - - measurement_groups - .sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version))); + + measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version))); // locking up mutation let sorted_measurement_groups = measurement_groups; - + let x: Vec = sorted_measurement_groups .into_iter() .group_by(|x| (x.run.clone(), x.version.clone())) @@ -136,7 +137,7 @@ fn detect_regressions(measurements: &[(PathBuf, Measurement)]) -> Result = v.into_iter() - // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` - .map(|(p, ms)| (p, ms.results[0].clone())) - .collect(); - - detect_regressions(&v_next) - }); + let regressions = measurements_from_files(Path::new(&results_directory)).and_then(|v| { + let v_next: Vec<(PathBuf, Measurement)> = v + .into_iter() + // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` + .map(|(p, ms)| (p, ms.results[0].clone())) + .collect(); + + detect_regressions(&v_next) + }); match regressions { Err(e) => panic!("{}", e), @@ -169,8 +170,7 @@ fn main() { } println!("the above regressions were found."); exit(1) - }, + } }, - } } From 5466d474c5a36bd5f50b0b8e8fdd8a902a9983ed Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 17:07:41 -0400 Subject: [PATCH 20/81] add test for exception display --- performance/comparison/src/main.rs | 64 +++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 5d7be748429..762fe5056dc 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -40,16 +40,16 @@ struct MeasurementGroup { #[derive(Error, Debug)] enum TestError { - #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())] - BadJSONErr(PathBuf, serde_json::Error), - #[error("ReadErr: The file cannot be read. \nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.to_string())] - ReadErr(PathBuf, io::Error), - #[error("MissingFilenameErr: The path provided does not specify a file. \nFilepath: {}\n", .0.to_string_lossy().into_owned())] + #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + BadJSONErr(PathBuf, Option), + #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + ReadErr(PathBuf, Option), + #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())] MissingFilenameErr(PathBuf), #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] FilenameNotUnicodeErr(PathBuf), - #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] - BadFileContentsErr(PathBuf, io::Error), + #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + BadFileContentsErr(PathBuf, Option), } fn regression(baseline: &Measurement, latest: &Measurement) -> Option { @@ -73,7 +73,7 @@ fn measurements_from_files( results_directory: &Path, ) -> Result, TestError> { let result_files = fs::read_dir(results_directory) - .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), e)))?; + .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), Some(e))))?; result_files .into_iter() @@ -87,10 +87,10 @@ fn measurements_from_files( .map(|filename| { println!("{:?}", filename); fs::read_to_string(&filename) - .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), e))) + .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e)))) .and_then(|contents| { serde_json::from_str::(&contents) - .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), e))) + .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), Some(e)))) }) .map(|m| (filename, m)) }) @@ -174,3 +174,47 @@ fn main() { }, } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_messages() { + let pairs = vec![ + ( + TestError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), + r#"BadJSONErr: JSON in file cannot be deserialized as expected. +Filepath: dummy/path/file.json +Originating Exception:None"# + ), + ( + TestError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None), + r#"ReadErr: The file cannot be read. +Filepath: dummy/path/file.json +Originating Exception:None"# + ), + ( + TestError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"MissingFilenameErr: The path provided does not specify a file. +Filepath: dummy/path/no_file/"# + ), + ( + TestError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file. +Filepath: dummy/path/no_file/"# + ), + ( + TestError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None), + r#"BadFileContentsErr: Check that the file exists and is readable. +Filepath: dummy/path/filenotexist.json +Originating Exception:None"# + ), + ]; + + for (err, msg) in pairs { + assert_eq!(format!("{}", err), msg) + } + } + +} From fa78102eaf4b1512c2abc46375a59931985c3921 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 17:23:30 -0400 Subject: [PATCH 21/81] add comparison to workflow --- .github/workflows/performance.yml | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 478f12eda63..f948afa8427 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -127,14 +127,29 @@ jobs: name: Compare Results runs-on: ubuntu-latest steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: "test rust comparator" + uses: actions-rs/cargo@v1 + with: + command: test + args: --release --manifest-path performance/comparison/Cargo.toml + - name: "build runnable rust comparator" + uses: actions-rs/cargo@v1 + with: + command: build + args: --release --manifest-path performance/comparison/Cargo.toml - uses: actions/download-artifact@v2 with: name: dev-results - - name: ls result files - run: ls - uses: actions/download-artifact@v2 with: name: latest-results - name: ls result files run: ls - # TODO actually compare + - name: "run comparator" + run: ./performance/comparison/target/release/comparison performance/results/ From 8a5e9b71a57d651889ebd0d2d898aa1f2360e5ca Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 17:50:33 -0400 Subject: [PATCH 22/81] add some output to comparitor --- performance/comparison/src/main.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 762fe5056dc..864ff086ce4 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::process::exit; use std::{env, fs, io}; +use std::borrow::Cow; use thiserror::Error; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -157,6 +158,11 @@ fn main() { .map(|(p, ms)| (p, ms.results[0].clone())) .collect(); + println!("checking regressions with the following measurements:"); + for (path, _) in &v_next { + println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) + } + detect_regressions(&v_next) }); From 429396aa0293fdd58acbadb1a4acecb1d19df32f Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 17:55:26 -0400 Subject: [PATCH 23/81] move step dependencies around --- .github/workflows/performance.yml | 70 +++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index f948afa8427..1fffbbe42a7 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -17,7 +17,7 @@ jobs: # runs any tests associated with the runner # these tests make sure the runner logic is correct # if there are no tests, it still quickly builds the project to fail fast - test-rust: + test-runner: name: Test Runner runs-on: ubuntu-latest steps: @@ -32,9 +32,27 @@ jobs: command: test args: --manifest-path performance/runner/Cargo.toml + # runs any tests associated with the comparator + # these tests make sure the comparison logic is correct + # if there are no tests, it still quickly builds the project to fail fast + test-comparator: + name: Test Comparator + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + args: --manifest-path performance/comparison/Cargo.toml + # build an optimized binary to be used as the runner in later steps - build-rust: - needs: [test-rust] + build-runner: + needs: [test-runner] name: Build Runner runs-on: ubuntu-latest env: @@ -55,9 +73,32 @@ jobs: name: runner path: performance/runner/target/release/runner + # build an optimized binary to be used as the comparator in later steps + build-comparator: + needs: [test-comparator] + name: Build Runner + runs-on: ubuntu-latest + env: + RUSTFLAGS: "-D warnings" + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: build + args: --release --manifest-path performance/comparison/Cargo.toml + - uses: actions/upload-artifact@v2 + with: + name: comparison + path: performance/comparison/target/release/comparison + # run the performance measurements on the current or default branch measure-dev: - needs: [build-rust] + needs: [build-runner, build-comparator] name: Measure Dev Branch runs-on: ubuntu-latest steps: @@ -85,7 +126,7 @@ jobs: # run the performance measurements on the latest release branch measure-latest: - needs: [build-rust] + needs: [build-runner, build-comparator] name: Measure Latest Branch runs-on: ubuntu-latest steps: @@ -133,16 +174,6 @@ jobs: profile: minimal toolchain: stable override: true - - name: "test rust comparator" - uses: actions-rs/cargo@v1 - with: - command: test - args: --release --manifest-path performance/comparison/Cargo.toml - - name: "build runnable rust comparator" - uses: actions-rs/cargo@v1 - with: - command: build - args: --release --manifest-path performance/comparison/Cargo.toml - uses: actions/download-artifact@v2 with: name: dev-results @@ -151,5 +182,10 @@ jobs: name: latest-results - name: ls result files run: ls - - name: "run comparator" - run: ./performance/comparison/target/release/comparison performance/results/ + - uses: actions/download-artifact@v2 + with: + name: comparator + - name: change permissions + run: chmod +x ./comparison + - name: run comparator + run: ./comparison performance/results/ From f4775d76733c0287fe24efe2278d02e235b43e8c Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 17:56:29 -0400 Subject: [PATCH 24/81] change job name --- .github/workflows/performance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 1fffbbe42a7..1ae92bf729e 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -76,7 +76,7 @@ jobs: # build an optimized binary to be used as the comparator in later steps build-comparator: needs: [test-comparator] - name: Build Runner + name: Build Comparator runs-on: ubuntu-latest env: RUSTFLAGS: "-D warnings" From 6ba837d73ddf0d9a73c337a36d000a35cbc588b2 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 18:03:04 -0400 Subject: [PATCH 25/81] fix download artifact name --- .github/workflows/performance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 1ae92bf729e..ed3facd0cc6 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -184,7 +184,7 @@ jobs: run: ls - uses: actions/download-artifact@v2 with: - name: comparator + name: comparison - name: change permissions run: chmod +x ./comparison - name: run comparator From 3d4a82cca2087ea686ab3037e4dd50e2cf094079 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 2 Aug 2021 18:09:31 -0400 Subject: [PATCH 26/81] add comments --- .github/workflows/performance.yml | 4 ++++ performance/comparison/src/main.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index ed3facd0cc6..72e16951eff 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -125,6 +125,8 @@ jobs: path: performance/results/ # run the performance measurements on the latest release branch + # this part takes by far the longest, so we do everything we can first + # so the job fails fast. measure-latest: needs: [build-runner, build-comparator] name: Measure Latest Branch @@ -163,6 +165,8 @@ jobs: name: latest-results path: performance/results/ + # run the comparison app on the result output generated from measuring + # the two branches. Exits with non-zero code if a regression is detected. compare-results: needs: [measure-dev, measure-latest] name: Compare Results diff --git a/performance/comparison/src/main.rs b/performance/comparison/src/main.rs index 864ff086ce4..63bf21b2ed8 100644 --- a/performance/comparison/src/main.rs +++ b/performance/comparison/src/main.rs @@ -142,6 +142,7 @@ fn detect_regressions( Ok(x) } +// TODO merge runner and comparison projects into one with two sub commands fn main() { // TODO args lib let args: Vec = env::args().collect(); @@ -160,6 +161,7 @@ fn main() { println!("checking regressions with the following measurements:"); for (path, _) in &v_next { + // TODO this printed nothing. What's the size of this vector? println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) } From 2f7ab2d038210088ff14fd7b2e2db4821f14b638 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:04:45 -0400 Subject: [PATCH 27/81] unify rust apps as subcommands --- .github/workflows/performance.yml | 58 +--- performance/comparison/Cargo.lock | 126 -------- performance/comparison/Cargo.toml | 10 - performance/runner/Cargo.lock | 300 ++++++++++++++++++ performance/runner/Cargo.toml | 7 +- .../src/main.rs => runner/src/calculate.rs} | 44 +-- performance/runner/src/main.rs | 114 ++----- performance/runner/src/measure.rs | 97 ++++++ 8 files changed, 459 insertions(+), 297 deletions(-) delete mode 100644 performance/comparison/Cargo.lock delete mode 100644 performance/comparison/Cargo.toml rename performance/{comparison/src/main.rs => runner/src/calculate.rs} (89%) create mode 100644 performance/runner/src/measure.rs diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 72e16951eff..7267503bd24 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -16,7 +16,6 @@ jobs: # runs any tests associated with the runner # these tests make sure the runner logic is correct - # if there are no tests, it still quickly builds the project to fail fast test-runner: name: Test Runner runs-on: ubuntu-latest @@ -32,24 +31,6 @@ jobs: command: test args: --manifest-path performance/runner/Cargo.toml - # runs any tests associated with the comparator - # these tests make sure the comparison logic is correct - # if there are no tests, it still quickly builds the project to fail fast - test-comparator: - name: Test Comparator - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path performance/comparison/Cargo.toml - # build an optimized binary to be used as the runner in later steps build-runner: needs: [test-runner] @@ -73,32 +54,9 @@ jobs: name: runner path: performance/runner/target/release/runner - # build an optimized binary to be used as the comparator in later steps - build-comparator: - needs: [test-comparator] - name: Build Comparator - runs-on: ubuntu-latest - env: - RUSTFLAGS: "-D warnings" - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: build - args: --release --manifest-path performance/comparison/Cargo.toml - - uses: actions/upload-artifact@v2 - with: - name: comparison - path: performance/comparison/target/release/comparison - # run the performance measurements on the current or default branch measure-dev: - needs: [build-runner, build-comparator] + needs: [build-runner] name: Measure Dev Branch runs-on: ubuntu-latest steps: @@ -118,7 +76,7 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run - run: ./runner ${{ github.workspace }}/performance/projects/ dev + run: ./runner -p ${{ github.workspace }}/performance/projects/ -b dev - uses: actions/upload-artifact@v2 with: name: dev-results @@ -128,7 +86,7 @@ jobs: # this part takes by far the longest, so we do everything we can first # so the job fails fast. measure-latest: - needs: [build-runner, build-comparator] + needs: [build-runner] name: Measure Latest Branch runs-on: ubuntu-latest steps: @@ -159,7 +117,7 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run runner - run: ./runner ${{ github.workspace }}/performance/projects/ latest + run: ./runner -p ${{ github.workspace }}/performance/projects/ -b latest - uses: actions/upload-artifact@v2 with: name: latest-results @@ -167,7 +125,7 @@ jobs: # run the comparison app on the result output generated from measuring # the two branches. Exits with non-zero code if a regression is detected. - compare-results: + calculate-regressions: needs: [measure-dev, measure-latest] name: Compare Results runs-on: ubuntu-latest @@ -190,6 +148,6 @@ jobs: with: name: comparison - name: change permissions - run: chmod +x ./comparison - - name: run comparator - run: ./comparison performance/results/ + run: chmod +x ./runner + - name: run calculation + run: ./runner calculate -r performance/results/ diff --git a/performance/comparison/Cargo.lock b/performance/comparison/Cargo.lock deleted file mode 100644 index 76e80198abc..00000000000 --- a/performance/comparison/Cargo.lock +++ /dev/null @@ -1,126 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "comparison" -version = "0.1.0" -dependencies = [ - "itertools", - "serde", - "serde_json", - "thiserror", -] - -[[package]] -name = "either" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" - -[[package]] -name = "itertools" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" - -[[package]] -name = "proc-macro2" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "ryu" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" - -[[package]] -name = "serde" -version = "1.0.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "1.0.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "thiserror" -version = "1.0.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" diff --git a/performance/comparison/Cargo.toml b/performance/comparison/Cargo.toml deleted file mode 100644 index 59ee1954fbd..00000000000 --- a/performance/comparison/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "comparison" -version = "0.1.0" -edition = "2018" - -[dependencies] -itertools = "0.10.1" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "1.0.26" diff --git a/performance/runner/Cargo.lock b/performance/runner/Cargo.lock index 0832b7312b5..8b90cedfef8 100644 --- a/performance/runner/Cargo.lock +++ b/performance/runner/Cargo.lock @@ -2,6 +2,306 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "itertools" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + [[package]] name = "runner" version = "0.1.0" +dependencies = [ + "itertools", + "serde", + "serde_json", + "structopt", + "thiserror", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "serde" +version = "1.0.127" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.127" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/performance/runner/Cargo.toml b/performance/runner/Cargo.toml index 77ff9695b77..a1581255689 100644 --- a/performance/runner/Cargo.toml +++ b/performance/runner/Cargo.toml @@ -3,6 +3,9 @@ name = "runner" version = "0.1.0" edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] +itertools = "0.10.1" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +structopt = "0.3" +thiserror = "1.0.26" diff --git a/performance/comparison/src/main.rs b/performance/runner/src/calculate.rs similarity index 89% rename from performance/comparison/src/main.rs rename to performance/runner/src/calculate.rs index 63bf21b2ed8..c2e0d592b4b 100644 --- a/performance/comparison/src/main.rs +++ b/performance/runner/src/calculate.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::process::exit; -use std::{env, fs, io}; +use std::{fs, io}; use std::borrow::Cow; use thiserror::Error; @@ -25,7 +25,7 @@ struct Measurements { } #[derive(Debug, Clone, PartialEq)] -struct Regression { +pub struct Regression { threshold: f64, difference: f64, baseline: Measurement, @@ -40,7 +40,7 @@ struct MeasurementGroup { } #[derive(Error, Debug)] -enum TestError { +pub enum TestError { #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] BadJSONErr(PathBuf, Option), #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] @@ -100,7 +100,7 @@ fn measurements_from_files( // given a list of filename-measurement pairs, detect any regressions by grouping // measurements together by filename -fn detect_regressions( +fn calculate_regressions( measurements: &[(PathBuf, Measurement)], ) -> Result, TestError> { let mut measurement_groups: Vec = measurements @@ -142,17 +142,8 @@ fn detect_regressions( Ok(x) } -// TODO merge runner and comparison projects into one with two sub commands -fn main() { - // TODO args lib - let args: Vec = env::args().collect(); - if args.len() < 2 { - println!("please provide the results directory"); - exit(1); - } - let results_directory = &args[1]; - - let regressions = measurements_from_files(Path::new(&results_directory)).and_then(|v| { +pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { + measurements_from_files(Path::new(&results_directory)).and_then(|v| { let v_next: Vec<(PathBuf, Measurement)> = v .into_iter() // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` @@ -165,21 +156,20 @@ fn main() { println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) } - detect_regressions(&v_next) - }); + calculate_regressions(&v_next) + }) +} +pub fn exit_properly(regressions: &[Regression]) { match regressions { - Err(e) => panic!("{}", e), - Ok(rs) => match rs[..] { - [] => println!("congrats! no regressions"), - _ => { - for r in rs { - println!("{:?}", r); - } - println!("the above regressions were found."); - exit(1) + [] => println!("congrats! no regressions"), + _ => { + for r in regressions { + println!("{:?}", r); } - }, + println!("the above regressions were found."); + exit(1) + } } } diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 86c8e01cdd1..d38d823870c 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -1,87 +1,37 @@ -use std::path::Path; -use std::process::{exit, Command, ExitStatus}; -use std::{env, fs, io}; - -#[derive(Debug, Clone)] -struct Metric<'a> { - name: &'a str, - prepare: &'a str, - cmd: &'a str, -} - -impl Metric<'_> { - fn outfile(&self, project: &str, branch: &str) -> String { - [branch, "_", self.name, "_", project, ".json"].join("") - } +extern crate structopt; + +mod calculate; +mod measure; + +use std::path::PathBuf; +use structopt::StructOpt; + +#[derive(Clone, Debug, StructOpt)] +#[structopt(name = "performance", about = "performance regression testing runner")] +enum Opt { + #[structopt(name = "measure")] + Measure { + #[structopt(parse(from_os_str))] + #[structopt(short)] + projects_dir: PathBuf, + #[structopt(short)] + branch_name: String, + }, + #[structopt(name = "calculate")] + Calculate { + #[structopt(parse(from_os_str))] + #[structopt(short)] + results_dir: PathBuf, + }, } fn main() { - // TODO args lib - let args: Vec = env::args().collect(); - if args.len() < 3 { - println!("please provide the target projects directory and branch name"); - exit(1); - } - let projects_directory = &args[1]; - let dbt_branch = &args[2]; - - // to add a new metric to the test suite, simply define it in this list: - let metrics: Vec = vec![Metric { - name: "parse", - prepare: "rm -rf target/", - cmd: "dbt parse --no-version-check", - }]; - - // list out all projects - let path = Path::new(&projects_directory); - let project_entries = fs::read_dir(path).unwrap(); - - let results: Result, io::Error> = project_entries - .map(|entry| { - metrics - .clone() - .into_iter() - .map(|metric| { - let path = entry.as_ref().unwrap().path(); - let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap(); - - Command::new("hyperfine") - .current_dir(&path) - // warms filesystem caches by running the command first without counting it. - // alternatively we could clear them before each run - .arg("--warmup") - .arg("1") - .arg("--prepare") - .arg(metric.prepare) - .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) - .arg("--export-json") - .arg( - ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""), - ) - // this prevents capture dbt output. Noisy, but good for debugging when tests fail. - .arg("--show-output") - .status() // use spawn() here instead for more information - }) - .collect::>>() - }) - .flatten() - .collect(); - - // only exit with status code 0 if everything ran as expected - match results { - // if dispatch of any of the commands failed, panic with that error - Err(e) => panic!("{:?}", e), - Ok(statuses) => { - for status in statuses { - match status.code() { - None => (), - Some(0) => (), - // if any child command exited with a non zero status code, exit with the same one here. - Some(nonzero) => exit(nonzero), - } - } - // everything ran as expected - exit(0); - } + match Opt::from_args() { + Opt::Measure { projects_dir, branch_name } => { + measure::proper_exit(measure::measure(&projects_dir, &branch_name).unwrap()) + }, + Opt::Calculate { results_dir } => { + calculate::exit_properly(&calculate::regressions(&results_dir).unwrap()) + }, } } diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs new file mode 100644 index 00000000000..4bf8cf20f91 --- /dev/null +++ b/performance/runner/src/measure.rs @@ -0,0 +1,97 @@ +extern crate structopt; + +use std::path::PathBuf; +use std::process::{exit, Command, ExitStatus}; +use std::{fs, io}; +use structopt::StructOpt; + +#[derive(Clone, Debug, StructOpt)] +#[structopt(name = "performance", about = "performance regression testing runner")] +enum Performance { + #[structopt(name = "measure")] + Measure { + #[structopt(parse(from_os_str))] + #[structopt(short)] + projects_dir: PathBuf, + #[structopt(short)] + branch_name: bool, + }, + #[structopt(name = "compare")] + Compare { + #[structopt(parse(from_os_str))] + #[structopt(short)] + results_dir: PathBuf, + }, +} + +#[derive(Debug, Clone)] +struct Metric<'a> { + name: &'a str, + prepare: &'a str, + cmd: &'a str, +} + +impl Metric<'_> { + fn outfile(&self, project: &str, branch: &str) -> String { + [branch, "_", self.name, "_", project, ".json"].join("") + } +} + +// calls hyperfine via system command, and returns result of runs +pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result, io::Error> { + // to add a new metric to the test suite, simply define it in this list: + // TODO read from some config file? + let metrics: Vec = vec![ + Metric { + name: "parse", + prepare: "rm -rf target/", + cmd: "dbt parse --no-version-check", + }, + ]; + + // list out all projects + let project_entries = fs::read_dir(projects_directory).unwrap(); + + project_entries + .map(|entry| { + metrics + .clone() + .into_iter() + .map(|metric| { + let path = entry.as_ref().unwrap().path(); + let project_name = &path.file_name().and_then(|x| x.to_str()).unwrap(); + + Command::new("hyperfine") + .current_dir(&path) + // warms filesystem caches by running the command first without counting it. + // alternatively we could clear them before each run + .arg("--warmup") + .arg("1") + .arg("--prepare") + .arg(metric.prepare) + .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) + .arg("--export-json") + .arg( + ["../../results/", &metric.outfile(project_name, &dbt_branch)].join(""), + ) + // this prevents capture dbt output. Noisy, but good for debugging when tests fail. + .arg("--show-output") + .status() // use spawn() here instead for more information + }) + .collect::>>() + }) + .flatten() + .collect() +} + +// exits the program given measurement results +pub fn proper_exit(statuses: Vec) { + for status in statuses { + match status.code() { + None => (), + Some(0) => (), + // if any child command exited with a non zero status code, exit with the same one here. + Some(nonzero) => exit(nonzero), + } + } +} From 868fd64adf878d4bd06de4b07391b025f5470d01 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:08:00 -0400 Subject: [PATCH 28/81] download correct artifact --- .github/workflows/performance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 7267503bd24..2ed046c2290 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -146,7 +146,7 @@ jobs: run: ls - uses: actions/download-artifact@v2 with: - name: comparison + name: runner - name: change permissions run: chmod +x ./runner - name: run calculation From 95116dbb5b641faed1faba70821e784e9869cb57 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:12:25 -0400 Subject: [PATCH 29/81] fix measure call --- .github/workflows/performance.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 2ed046c2290..8d5bc864b3d 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -76,7 +76,7 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run - run: ./runner -p ${{ github.workspace }}/performance/projects/ -b dev + run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/ - uses: actions/upload-artifact@v2 with: name: dev-results @@ -117,13 +117,13 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run runner - run: ./runner -p ${{ github.workspace }}/performance/projects/ -b latest + run: ./runner measure -b latest -p ${{ github.workspace }}/performance/projects/ - uses: actions/upload-artifact@v2 with: name: latest-results path: performance/results/ - # run the comparison app on the result output generated from measuring + # detect regressions on the output generated from measuring # the two branches. Exits with non-zero code if a regression is detected. calculate-regressions: needs: [measure-dev, measure-latest] From f1f99a23717184ee9617fdd9edd467f97668674a Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:15:44 -0400 Subject: [PATCH 30/81] add name to action --- .github/workflows/performance.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 8d5bc864b3d..cb994c7f53d 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -1,4 +1,5 @@ +name: Performance Regression Testing # Schedule triggers on: # TODO this is just while developing From 03332b2955e1eafd1486ed2bdc5b83b646fe4f8d Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:32:56 -0400 Subject: [PATCH 31/81] more gracefully exit than unwrapping --- performance/runner/src/calculate.rs | 4 ++-- performance/runner/src/main.rs | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index c2e0d592b4b..2ac693652e3 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -160,8 +160,8 @@ pub fn regressions(results_directory: &PathBuf) -> Result, TestE }) } -pub fn exit_properly(regressions: &[Regression]) { - match regressions { +pub fn exit_properly(regressions: Vec) { + match regressions[..] { [] => println!("congrats! no regressions"), _ => { for r in regressions { diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index d38d823870c..bd4eff6f832 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -3,7 +3,9 @@ extern crate structopt; mod calculate; mod measure; +use std::fmt::Display; use std::path::PathBuf; +use std::process::exit; use structopt::StructOpt; #[derive(Clone, Debug, StructOpt)] @@ -25,13 +27,29 @@ enum Opt { }, } +fn gracefully_exit_or(f: &dyn Fn(A) -> B, r: Result) -> B { + match r { + Err(e) => { + println!("{}", e); + exit(1) + }, + Ok(x) => f(x), + } +} + fn main() { match Opt::from_args() { Opt::Measure { projects_dir, branch_name } => { - measure::proper_exit(measure::measure(&projects_dir, &branch_name).unwrap()) + gracefully_exit_or( + &measure::proper_exit, + measure::measure(&projects_dir, &branch_name) + ) }, Opt::Calculate { results_dir } => { - calculate::exit_properly(&calculate::regressions(&results_dir).unwrap()) + gracefully_exit_or( + &calculate::exit_properly, + calculate::regressions(&results_dir) + ) }, } } From b02875a12b01fb91fcea46f3276200196c8dc817 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:35:32 -0400 Subject: [PATCH 32/81] add debug line --- performance/runner/src/calculate.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 2ac693652e3..003e173d790 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -151,6 +151,7 @@ pub fn regressions(results_directory: &PathBuf) -> Result, TestE .collect(); println!("checking regressions with the following measurements:"); + println!("length: {}", &v_next.len()); for (path, _) in &v_next { // TODO this printed nothing. What's the size of this vector? println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) From c182c05c2fa96ff9e8e19f77bc30c83b06d3e1c9 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:48:21 -0400 Subject: [PATCH 33/81] error when there are no results to process --- performance/runner/src/calculate.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 003e173d790..b4e7027a93c 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -51,6 +51,8 @@ pub enum TestError { FilenameNotUnicodeErr(PathBuf), #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] BadFileContentsErr(PathBuf, Option), + #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())] + NoResultsErr(PathBuf), } fn regression(baseline: &Measurement, latest: &Measurement) -> Option { @@ -144,6 +146,12 @@ fn calculate_regressions( pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { measurements_from_files(Path::new(&results_directory)).and_then(|v| { + // exit early with an Err if there are no results to process + match v[..] { + [] => Err(TestError::NoResultsErr(results_directory.clone())), + _ => Ok(()) + }?; + let v_next: Vec<(PathBuf, Measurement)> = v .into_iter() // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` @@ -151,7 +159,6 @@ pub fn regressions(results_directory: &PathBuf) -> Result, TestE .collect(); println!("checking regressions with the following measurements:"); - println!("length: {}", &v_next.len()); for (path, _) in &v_next { // TODO this printed nothing. What's the size of this vector? println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) @@ -209,6 +216,11 @@ Filepath: dummy/path/no_file/"# Filepath: dummy/path/filenotexist.json Originating Exception:None"# ), + ( + TestError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"NoResultsErr: The results directory has no json files in it. +Filepath: dummy/path/no_file/"# + ), ]; for (err, msg) in pairs { From 37e86257f55a57798d920cfc877e5251614e70cc Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 12:57:19 -0400 Subject: [PATCH 34/81] point to the downloaded artifacts correctly --- .github/workflows/performance.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index cb994c7f53d..f57683370e7 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -101,7 +101,7 @@ jobs: python-version: '3.8' - name: move repo up a level run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest - - name: debug line + - name: "[debug] ls new dbt location" run: ls ${{ github.workspace }}/../latest/dbt/ # installation creates egg-links so we have to preserve source - name: install dbt from new location @@ -132,18 +132,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - uses: actions/download-artifact@v2 with: name: dev-results - uses: actions/download-artifact@v2 with: name: latest-results - - name: ls result files + - name: "[debug] ls result files" run: ls - uses: actions/download-artifact@v2 with: @@ -151,4 +146,4 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run calculation - run: ./runner calculate -r performance/results/ + run: ./runner calculate -r ./ From 80244a09fe41189a5361a70504668656c8421ee8 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 14:00:30 -0400 Subject: [PATCH 35/81] add error for groups that are not two elements large --- performance/runner/src/calculate.rs | 47 ++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index b4e7027a93c..14c65e4dae9 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -33,7 +33,7 @@ pub struct Regression { } #[derive(Debug, Clone, PartialEq)] -struct MeasurementGroup { +pub struct MeasurementGroup { version: String, run: String, measurement: Measurement, @@ -53,6 +53,10 @@ pub enum TestError { BadFileContentsErr(PathBuf, Option), #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())] NoResultsErr(PathBuf), + #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())] + OddResultsCountErr(usize, PathBuf), + #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::>())] + BadGroupSizeErr(usize, Vec), } fn regression(baseline: &Measurement, latest: &Measurement) -> Option { @@ -88,7 +92,6 @@ fn measurements_from_files( .map_or(false, |ext| ext.ends_with("json")) }) .map(|filename| { - println!("{:?}", filename); fs::read_to_string(&filename) .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e)))) .and_then(|contents| { @@ -136,10 +139,13 @@ fn calculate_regressions( .into_iter() .map(|(_, group)| { let g: Vec = group.collect(); - regression(&g[0].measurement, &g[1].measurement) + match g.len() { + 2 => regression(&g[0].measurement, &g[1].measurement).map(Ok), + i => Some(Err(TestError::BadGroupSizeErr(i, g))), + } }) .flatten() - .collect(); + .collect::, TestError>>()?; Ok(x) } @@ -147,9 +153,10 @@ fn calculate_regressions( pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { measurements_from_files(Path::new(&results_directory)).and_then(|v| { // exit early with an Err if there are no results to process - match v[..] { - [] => Err(TestError::NoResultsErr(results_directory.clone())), - _ => Ok(()) + match v.len() { + 0 => Err(TestError::NoResultsErr(results_directory.clone())), + i if i % 2 == 1 => Err(TestError::OddResultsCountErr(i, results_directory.clone())), + _ => Ok(()), }?; let v_next: Vec<(PathBuf, Measurement)> = v @@ -221,6 +228,32 @@ Originating Exception:None"# r#"NoResultsErr: The results directory has no json files in it. Filepath: dummy/path/no_file/"# ), + ( + TestError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()), + r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number. +File Count: 3 +Filepath: dummy/path/no_file/"# + ), + ( + TestError::BadGroupSizeErr(1, vec![MeasurementGroup { + version: "dev".to_owned(), + run: "some command".to_owned(), + measurement: Measurement { + command: "some command".to_owned(), + mean: 1.0, + stddev: 1.0, + median: 1.0, + user: 1.0, + system: 1.0, + min: 1.0, + max: 1.0, + times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1], + } + }]), + r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair. +Count: 1 +Group: [("dev", "some command")]"# + ), ]; for (err, msg) in pairs { From faff8c00b3880eb3ef79424fcfa949747c14fdad Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 14:37:49 -0400 Subject: [PATCH 36/81] group by run only --- performance/runner/src/calculate.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 14c65e4dae9..f755b0c3f1d 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -121,7 +121,7 @@ fn calculate_regressions( let parts: Vec<&str> = name.split("_").collect(); MeasurementGroup { version: parts[0].to_owned(), - run: parts[1..].join(""), + run: parts[1..].join("_"), measurement: m.clone(), } }) @@ -135,7 +135,7 @@ fn calculate_regressions( let x: Vec = sorted_measurement_groups .into_iter() - .group_by(|x| (x.run.clone(), x.version.clone())) + .group_by(|x| x.run.clone()) .into_iter() .map(|(_, group)| { let g: Vec = group.collect(); From 757614d57f8769d7fd9e3c0f9ba2537236b28433 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 14:59:21 -0400 Subject: [PATCH 37/81] add stddev regression --- performance/runner/src/calculate.rs | 69 ++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index f755b0c3f1d..d0cfc378903 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -25,11 +25,19 @@ struct Measurements { } #[derive(Debug, Clone, PartialEq)] -pub struct Regression { - threshold: f64, - difference: f64, - baseline: Measurement, - latest: Measurement, +pub enum Regression { + Median { + threshold: f64, + difference: f64, + baseline: f64, + latest: f64, + }, + StandardDeviation { + threshold: f64, + difference: f64, + baseline: f64, + latest: f64, + }, } #[derive(Debug, Clone, PartialEq)] @@ -59,19 +67,38 @@ pub enum TestError { BadGroupSizeErr(usize, Vec), } -fn regression(baseline: &Measurement, latest: &Measurement) -> Option { - let threshold = 1.05; // 5% regression threshold - let difference = latest.median / baseline.median; - if difference > threshold { - Some(Regression { - threshold, - difference, - baseline: baseline.clone(), - latest: latest.clone(), - }) - } else { - None - } +fn regression(baseline: &Measurement, latest: &Measurement) -> Vec { + let median_threshold = 1.05; // 5% regression threshold + let median_difference = latest.median / baseline.median; + + let stddev_threshold = 1.05; // 5% regression threshold + let stddev_difference = latest.stddev / baseline.stddev; + + let mut regressions = vec![]; + + if median_difference > median_threshold { + regressions.push( + Regression::Median { + threshold: median_threshold, + difference: median_difference, + baseline: baseline.median.clone(), + latest: latest.median.clone(), + } + ) + }; + + if stddev_difference > stddev_threshold { + regressions.push( + Regression::StandardDeviation { + threshold: stddev_threshold, + difference: stddev_difference, + baseline: baseline.stddev.clone(), + latest: latest.stddev.clone(), + } + ) + }; + + regressions } // given a directory, read all files in the directory and return each @@ -140,8 +167,8 @@ fn calculate_regressions( .map(|(_, group)| { let g: Vec = group.collect(); match g.len() { - 2 => regression(&g[0].measurement, &g[1].measurement).map(Ok), - i => Some(Err(TestError::BadGroupSizeErr(i, g))), + 2 => regression(&g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(), + i => vec![Err(TestError::BadGroupSizeErr(i, g))], } }) .flatten() @@ -180,7 +207,7 @@ pub fn exit_properly(regressions: Vec) { [] => println!("congrats! no regressions"), _ => { for r in regressions { - println!("{:?}", r); + println!("{:#?}", r); } println!("the above regressions were found."); exit(1) From ed91ded2c1dc140163c3291797e973db0f596aee Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 15:04:05 -0400 Subject: [PATCH 38/81] branches named dev and baseline --- .github/workflows/performance.yml | 18 +++++++++--------- performance/runner/src/calculate.rs | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index f57683370e7..69a27e65af3 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -86,9 +86,9 @@ jobs: # run the performance measurements on the latest release branch # this part takes by far the longest, so we do everything we can first # so the job fails fast. - measure-latest: + measure-baseline: needs: [build-runner] - name: Measure Latest Branch + name: Measure Baseline Branch runs-on: ubuntu-latest steps: - name: checkout latest @@ -100,12 +100,12 @@ jobs: with: python-version: '3.8' - name: move repo up a level - run: mkdir ${{ github.workspace }}/../latest/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../latest + run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline - name: "[debug] ls new dbt location" - run: ls ${{ github.workspace }}/../latest/dbt/ + run: ls ${{ github.workspace }}/../baseline/dbt/ # installation creates egg-links so we have to preserve source - name: install dbt from new location - run: cd ${{ github.workspace }}/../latest/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt + run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt # checkout the current branch to get all the target projects # this deletes the old checked out code which is why we had to copy before - name: checkout dev @@ -118,16 +118,16 @@ jobs: - name: change permissions run: chmod +x ./runner - name: run runner - run: ./runner measure -b latest -p ${{ github.workspace }}/performance/projects/ + run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/ - uses: actions/upload-artifact@v2 with: - name: latest-results + name: baseline-results path: performance/results/ # detect regressions on the output generated from measuring # the two branches. Exits with non-zero code if a regression is detected. calculate-regressions: - needs: [measure-dev, measure-latest] + needs: [measure-dev, measure-baseline] name: Compare Results runs-on: ubuntu-latest steps: @@ -137,7 +137,7 @@ jobs: name: dev-results - uses: actions/download-artifact@v2 with: - name: latest-results + name: baseline-results - name: "[debug] ls result files" run: ls - uses: actions/download-artifact@v2 diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index d0cfc378903..59315c26ad8 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -30,13 +30,13 @@ pub enum Regression { threshold: f64, difference: f64, baseline: f64, - latest: f64, + dev: f64, }, StandardDeviation { threshold: f64, difference: f64, baseline: f64, - latest: f64, + dev: f64, }, } @@ -67,12 +67,12 @@ pub enum TestError { BadGroupSizeErr(usize, Vec), } -fn regression(baseline: &Measurement, latest: &Measurement) -> Vec { +fn regression(baseline: &Measurement, dev: &Measurement) -> Vec { let median_threshold = 1.05; // 5% regression threshold - let median_difference = latest.median / baseline.median; + let median_difference = dev.median / baseline.median; let stddev_threshold = 1.05; // 5% regression threshold - let stddev_difference = latest.stddev / baseline.stddev; + let stddev_difference = dev.stddev / baseline.stddev; let mut regressions = vec![]; @@ -82,7 +82,7 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Vec { threshold: median_threshold, difference: median_difference, baseline: baseline.median.clone(), - latest: latest.median.clone(), + dev: dev.median.clone(), } ) }; @@ -93,7 +93,7 @@ fn regression(baseline: &Measurement, latest: &Measurement) -> Vec { threshold: stddev_threshold, difference: stddev_difference, baseline: baseline.stddev.clone(), - latest: latest.stddev.clone(), + dev: dev.stddev.clone(), } ) }; From fe24dd43d4dda8a5fa4d05ada918f8b432f1bc9a Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 16:10:16 -0400 Subject: [PATCH 39/81] return all calculations not just regressions --- performance/runner/src/calculate.rs | 68 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 59315c26ad8..2a10d88a4c4 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -25,19 +25,18 @@ struct Measurements { } #[derive(Debug, Clone, PartialEq)] -pub enum Regression { - Median { - threshold: f64, - difference: f64, - baseline: f64, - dev: f64, - }, - StandardDeviation { - threshold: f64, - difference: f64, - baseline: f64, - dev: f64, - }, +struct Data { + threshold: f64, + difference: f64, + baseline: f64, + dev: f64, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Calculation { + metric: String, + regression: bool, + data: Data, } #[derive(Debug, Clone, PartialEq)] @@ -67,38 +66,35 @@ pub enum TestError { BadGroupSizeErr(usize, Vec), } -fn regression(baseline: &Measurement, dev: &Measurement) -> Vec { +fn calculate(metric: &str, baseline: &Measurement, dev: &Measurement) -> Vec { let median_threshold = 1.05; // 5% regression threshold let median_difference = dev.median / baseline.median; let stddev_threshold = 1.05; // 5% regression threshold let stddev_difference = dev.stddev / baseline.stddev; - let mut regressions = vec![]; - - if median_difference > median_threshold { - regressions.push( - Regression::Median { + vec![ + Calculation { + metric: ["median", metric].join("_"), + regression: median_difference > median_threshold, + data: Data { threshold: median_threshold, difference: median_difference, baseline: baseline.median.clone(), dev: dev.median.clone(), } - ) - }; - - if stddev_difference > stddev_threshold { - regressions.push( - Regression::StandardDeviation { + }, + Calculation { + metric: ["stddev", metric].join("_"), + regression: stddev_difference > stddev_threshold, + data: Data { threshold: stddev_threshold, difference: stddev_difference, baseline: baseline.stddev.clone(), dev: dev.stddev.clone(), } - ) - }; - - regressions + }, + ] } // given a directory, read all files in the directory and return each @@ -134,7 +130,7 @@ fn measurements_from_files( // measurements together by filename fn calculate_regressions( measurements: &[(PathBuf, Measurement)], -) -> Result, TestError> { +) -> Result, TestError> { let mut measurement_groups: Vec = measurements .into_iter() .map(|(p, m)| { @@ -160,24 +156,24 @@ fn calculate_regressions( // locking up mutation let sorted_measurement_groups = measurement_groups; - let x: Vec = sorted_measurement_groups + let x: Vec = sorted_measurement_groups .into_iter() .group_by(|x| x.run.clone()) .into_iter() .map(|(_, group)| { let g: Vec = group.collect(); match g.len() { - 2 => regression(&g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(), + 2 => calculate(&g[0].run, &g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(), i => vec![Err(TestError::BadGroupSizeErr(i, g))], } }) .flatten() - .collect::, TestError>>()?; + .collect::, TestError>>()?; Ok(x) } -pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { +pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { measurements_from_files(Path::new(&results_directory)).and_then(|v| { // exit early with an Err if there are no results to process match v.len() { @@ -202,13 +198,15 @@ pub fn regressions(results_directory: &PathBuf) -> Result, TestE }) } -pub fn exit_properly(regressions: Vec) { +pub fn exit_properly(calculations: Vec) { + let regressions = calculations.into_iter().filter(|c| c.regression).collect::>(); match regressions[..] { [] => println!("congrats! no regressions"), _ => { for r in regressions { println!("{:#?}", r); } + println!(""); println!("the above regressions were found."); exit(1) } From 873e9714f8f8d447be009e6a75c22189ed38f198 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 16:22:02 -0400 Subject: [PATCH 40/81] move io operations to main --- performance/runner/src/calculate.rs | 67 ++++++++++------------------- performance/runner/src/main.rs | 32 ++++++++++++-- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 2a10d88a4c4..655a4498609 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -7,43 +7,43 @@ use std::borrow::Cow; use thiserror::Error; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -struct Measurement { - command: String, - mean: f64, - stddev: f64, - median: f64, - user: f64, - system: f64, - min: f64, - max: f64, - times: Vec, +pub struct Measurement { + pub command: String, + pub mean: f64, + pub stddev: f64, + pub median: f64, + pub user: f64, + pub system: f64, + pub min: f64, + pub max: f64, + pub times: Vec, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -struct Measurements { - results: Vec, +pub struct Measurements { + pub results: Vec, } #[derive(Debug, Clone, PartialEq)] -struct Data { - threshold: f64, - difference: f64, - baseline: f64, - dev: f64, +pub struct Data { + pub threshold: f64, + pub difference: f64, + pub baseline: f64, + pub dev: f64, } #[derive(Debug, Clone, PartialEq)] pub struct Calculation { - metric: String, - regression: bool, - data: Data, + pub metric: String, + pub regression: bool, + pub data: Data, } #[derive(Debug, Clone, PartialEq)] pub struct MeasurementGroup { - version: String, - run: String, - measurement: Measurement, + pub version: String, + pub run: String, + pub measurement: Measurement, } #[derive(Error, Debug)] @@ -188,31 +188,10 @@ pub fn regressions(results_directory: &PathBuf) -> Result, Test .map(|(p, ms)| (p, ms.results[0].clone())) .collect(); - println!("checking regressions with the following measurements:"); - for (path, _) in &v_next { - // TODO this printed nothing. What's the size of this vector? - println!("{}", path.file_name().map(|x| x.to_string_lossy()).unwrap_or(Cow::from("unknown file"))) - } - calculate_regressions(&v_next) }) } -pub fn exit_properly(calculations: Vec) { - let regressions = calculations.into_iter().filter(|c| c.regression).collect::>(); - match regressions[..] { - [] => println!("congrats! no regressions"), - _ => { - for r in regressions { - println!("{:#?}", r); - } - println!(""); - println!("the above regressions were found."); - exit(1) - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index bd4eff6f832..bdbd48e8951 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -3,6 +3,7 @@ extern crate structopt; mod calculate; mod measure; +use crate::calculate::Calculation; use std::fmt::Display; use std::path::PathBuf; use std::process::exit; @@ -37,6 +38,8 @@ fn gracefully_exit_or(f: &dyn Fn(A) -> B, r: Result) -> } } +// This is where all the printing, exiting, and error displaying +// should happen. Module functions should only return values. fn main() { match Opt::from_args() { Opt::Measure { projects_dir, branch_name } => { @@ -46,10 +49,33 @@ fn main() { ) }, Opt::Calculate { results_dir } => { - gracefully_exit_or( - &calculate::exit_properly, + let calculations = gracefully_exit_or( + &|x| x, calculate::regressions(&results_dir) - ) + ); + + println!(":: All Calculations ::\n"); + for c in &calculations { + println!("{:#?}", c); + } + println!(""); + + let regressions: Vec = calculations + .into_iter() + .filter(|c| c.regression) + .collect(); + + match regressions[..] { + [] => println!("congrats! no regressions :)"), + _ => { + println!(":: Regressions Found ::\n"); + for r in regressions { + println!("{:#?}", r); + } + println!(""); + exit(1) + } + } }, } } From 3004969a933275ca8e2b09157a9cc52a0285c68a Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 16:26:29 -0400 Subject: [PATCH 41/81] move measure io to main --- performance/runner/src/calculate.rs | 2 -- performance/runner/src/main.rs | 21 ++++++++++++++++++--- performance/runner/src/measure.rs | 14 +------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 655a4498609..827ae2f6228 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -1,9 +1,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; -use std::process::exit; use std::{fs, io}; -use std::borrow::Cow; use thiserror::Error; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index bdbd48e8951..8980bf23ea3 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -42,12 +42,26 @@ fn gracefully_exit_or(f: &dyn Fn(A) -> B, r: Result) -> // should happen. Module functions should only return values. fn main() { match Opt::from_args() { + Opt::Measure { projects_dir, branch_name } => { - gracefully_exit_or( - &measure::proper_exit, + let statuses = gracefully_exit_or( + &|x| x, measure::measure(&projects_dir, &branch_name) - ) + ); + + for status in statuses { + match status.code() { + None => (), + Some(0) => (), + // if any child command exited with a non zero status code, exit with the same one here. + Some(nonzero) => { + println!("a child process exited with a nonzero status code."); + exit(nonzero) + }, + } + } }, + Opt::Calculate { results_dir } => { let calculations = gracefully_exit_or( &|x| x, @@ -77,5 +91,6 @@ fn main() { } } }, + } } diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index 4bf8cf20f91..91222aa5f1a 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -1,7 +1,7 @@ extern crate structopt; use std::path::PathBuf; -use std::process::{exit, Command, ExitStatus}; +use std::process::{Command, ExitStatus}; use std::{fs, io}; use structopt::StructOpt; @@ -83,15 +83,3 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result) { - for status in statuses { - match status.code() { - None => (), - Some(0) => (), - // if any child command exited with a non zero status code, exit with the same one here. - Some(nonzero) => exit(nonzero), - } - } -} From 4e020c387802b3037acf4797f762ce32bafaaaa4 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 16:50:08 -0400 Subject: [PATCH 42/81] enforce branch names at calculation time --- performance/runner/src/calculate.rs | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 827ae2f6228..42dbbf0ade4 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -62,9 +62,11 @@ pub enum TestError { OddResultsCountErr(usize, PathBuf), #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::>())] BadGroupSizeErr(usize, Vec), + #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)] + BadBranchNameErr(String, String), } -fn calculate(metric: &str, baseline: &Measurement, dev: &Measurement) -> Vec { +fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec { let median_threshold = 1.05; // 5% regression threshold let median_difference = dev.median / baseline.median; @@ -159,9 +161,20 @@ fn calculate_regressions( .group_by(|x| x.run.clone()) .into_iter() .map(|(_, group)| { - let g: Vec = group.collect(); + let mut g: Vec = group.collect(); + g.sort_by(|x, y| x.version.cmp(&y.version)); + match g.len() { - 2 => calculate(&g[0].run, &g[0].measurement, &g[1].measurement).into_iter().map(Ok).collect(), + 2 => { + let dev = &g[0]; + let baseline = &g[1]; + + if dev.version == "dev" && baseline.version == "baseline" { + calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect() + } else { + vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))] + } + }, i => vec![Err(TestError::BadGroupSizeErr(i, g))], } }) @@ -256,6 +269,14 @@ Filepath: dummy/path/no_file/"# Count: 1 Group: [("dev", "some command")]"# ), + ( + TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()).to_path_buf(), + r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'. +Found: boop, noop"# + ), + + + ]; for (err, msg) in pairs { From 5891b59790d4c3e9d6d2e36c6a260e2ba9034e0b Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 16:52:28 -0400 Subject: [PATCH 43/81] better variable names --- performance/runner/src/calculate.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 42dbbf0ade4..9d2318128b7 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -156,18 +156,18 @@ fn calculate_regressions( // locking up mutation let sorted_measurement_groups = measurement_groups; - let x: Vec = sorted_measurement_groups + let calculations: Vec = sorted_measurement_groups .into_iter() .group_by(|x| x.run.clone()) .into_iter() - .map(|(_, group)| { - let mut g: Vec = group.collect(); - g.sort_by(|x, y| x.version.cmp(&y.version)); + .map(|(_, g)| { + let mut groups: Vec = g.collect(); + groups.sort_by(|x, y| x.version.cmp(&y.version)); - match g.len() { + match groups.len() { 2 => { - let dev = &g[0]; - let baseline = &g[1]; + let dev = &groups[0]; + let baseline = &groups[1]; if dev.version == "dev" && baseline.version == "baseline" { calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect() @@ -175,13 +175,13 @@ fn calculate_regressions( vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))] } }, - i => vec![Err(TestError::BadGroupSizeErr(i, g))], + i => vec![Err(TestError::BadGroupSizeErr(i, groups))], } }) .flatten() .collect::, TestError>>()?; - Ok(x) + Ok(calculations) } pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { From 935c1387361014625532b554de1f672660deea1f Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:03:37 -0400 Subject: [PATCH 44/81] type gymnastics --- performance/runner/src/calculate.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 9d2318128b7..3f0f147af68 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -166,20 +166,20 @@ fn calculate_regressions( match groups.len() { 2 => { - let dev = &groups[0]; - let baseline = &groups[1]; + let dev = &groups[1]; + let baseline = &groups[0]; if dev.version == "dev" && baseline.version == "baseline" { - calculate(&dev.run, &dev.measurement, &baseline.measurement).into_iter().map(Ok).collect() + Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement)) } else { - vec![Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone()))] + Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone())) } }, - i => vec![Err(TestError::BadGroupSizeErr(i, groups))], + i => Err(TestError::BadGroupSizeErr(i, groups)), } }) - .flatten() - .collect::, TestError>>()?; + .collect::>, TestError>>()? + .concat(); Ok(calculations) } From 64bf9c8885caf5c77fcf4dee9b1af9b61bab7c51 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:04:13 -0400 Subject: [PATCH 45/81] test fix --- performance/runner/src/calculate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 3f0f147af68..03b2a24b32e 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -270,7 +270,7 @@ Count: 1 Group: [("dev", "some command")]"# ), ( - TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()).to_path_buf(), + TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()), r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'. Found: boop, noop"# ), From 252280b56e1ab64364e9164ab434408c603d3dc6 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:34:10 -0400 Subject: [PATCH 46/81] write out results as artifact --- .github/workflows/performance.yml | 4 ++++ performance/runner/src/calculate.rs | 4 ++-- performance/runner/src/main.rs | 17 ++++++++++++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 69a27e65af3..6d94f2954d7 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -147,3 +147,7 @@ jobs: run: chmod +x ./runner - name: run calculation run: ./runner calculate -r ./ + - uses: actions/upload-artifact@v2 + with: + name: final-calculations + path: ./final_calculations.json diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 03b2a24b32e..08a0de961bd 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -22,7 +22,7 @@ pub struct Measurements { pub results: Vec, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Data { pub threshold: f64, pub difference: f64, @@ -30,7 +30,7 @@ pub struct Data { pub dev: f64, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Calculation { pub metric: String, pub regression: bool, diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 8980bf23ea3..61efee5a189 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -5,6 +5,8 @@ mod measure; use crate::calculate::Calculation; use std::fmt::Display; +use std::fs::File; +use std::io::Write; use std::path::PathBuf; use std::process::exit; use structopt::StructOpt; @@ -68,17 +70,30 @@ fn main() { calculate::regressions(&results_dir) ); + // print calculations to stdout println!(":: All Calculations ::\n"); for c in &calculations { - println!("{:#?}", c); + println!("{:#?}\n", c); } println!(""); + // write calculations to file so it can be downloaded as an artifact + let json_calcs = serde_json::to_string(&calculations) + .expect("failed to serialize calculations to json"); + + let outfile = &mut results_dir.into_os_string(); + outfile.push("/final_calculations.json"); + + let mut f = File::create(outfile).expect("Unable to create file"); + f.write_all(json_calcs.as_bytes()).expect("unable to write data"); + + // filter for regressions let regressions: Vec = calculations .into_iter() .filter(|c| c.regression) .collect(); + // exit with non zero exit code if there are regressions match regressions[..] { [] => println!("congrats! no regressions :)"), _ => { From bad0198a362bb305b4f6fdd01708b5ab95d130e2 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:55:03 -0400 Subject: [PATCH 47/81] minor readme changes --- performance/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/performance/README.md b/performance/README.md index 8b9ce3058ab..aa680b6bae3 100644 --- a/performance/README.md +++ b/performance/README.md @@ -1,13 +1,14 @@ -# Performance Testing +# Performance Regression Testing This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually. -## Adding a new project -Just make a new directory under projects. It will automatically be picked up by the tests. +## Adding a new dbt project +Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests. ## Adding a new dbt command -In `runner/src/main.rs` add a metric to the `metrics` Vec in the main function. The Github Action will handle recompilation. +In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed. ## Future work - add more projects to test different configurations that have been known bottlenecks - add more dbt commands to measure -- consider storing these results so they can be graphed over time +- possibly using the uploaded json artifacts to store these results so they can be graphed over time +- reading new metrics from a file so no one has to edit rust source to add them to the suite From 580b1fdd688e3fe7fa9fc01a459e1f946d8bddae Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:56:30 -0400 Subject: [PATCH 48/81] minor print change --- performance/runner/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 61efee5a189..e3e07b7117a 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -98,8 +98,9 @@ fn main() { [] => println!("congrats! no regressions :)"), _ => { println!(":: Regressions Found ::\n"); + println!(""); for r in regressions { - println!("{:#?}", r); + println!("{:#?}\n", r); } println!(""); exit(1) From 1f189f522556ce54dd2b4caf35606151f3cf6652 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 17:59:53 -0400 Subject: [PATCH 49/81] added small paragraph to readme --- performance/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/performance/README.md b/performance/README.md index aa680b6bae3..341e9da4333 100644 --- a/performance/README.md +++ b/performance/README.md @@ -1,6 +1,8 @@ # Performance Regression Testing This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually. +The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards. + ## Adding a new dbt project Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests. From 486afa9fcd2728eb8353241a8c43da671f374ac3 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 3 Aug 2021 18:04:22 -0400 Subject: [PATCH 50/81] upload results even when regressions are detected --- .github/workflows/performance.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 6d94f2954d7..01fa3855e5f 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -147,7 +147,9 @@ jobs: run: chmod +x ./runner - name: run calculation run: ./runner calculate -r ./ + # always attempt to upload the results even if there were regressions found - uses: actions/upload-artifact@v2 + if: ${{ always() }} with: name: final-calculations path: ./final_calculations.json From ba9d76b3f9fe641a883fca24dd71a9b48f72abc2 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 10:11:09 -0400 Subject: [PATCH 51/81] make final json human readable --- performance/runner/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index e3e07b7117a..5c289bf3a27 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -78,7 +78,7 @@ fn main() { println!(""); // write calculations to file so it can be downloaded as an artifact - let json_calcs = serde_json::to_string(&calculations) + let json_calcs = serde_json::to_string_pretty(&calculations) .expect("failed to serialize calculations to json"); let outfile = &mut results_dir.into_os_string(); From 287c4d2b031814de0e49361cfbd0b39f75de94cf Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 12:15:29 -0400 Subject: [PATCH 52/81] revamp exception hierarchy --- performance/runner/src/calculate.rs | 140 ++++----------------------- performance/runner/src/exceptions.rs | 140 +++++++++++++++++++++++++++ performance/runner/src/main.rs | 1 + performance/runner/src/measure.rs | 29 ++++-- 4 files changed, 179 insertions(+), 131 deletions(-) create mode 100644 performance/runner/src/exceptions.rs diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 08a0de961bd..3f2f5f52cba 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -1,8 +1,8 @@ +use crate::exceptions::{CalculateError, IOError}; use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; -use std::{fs, io}; -use thiserror::Error; +use std::fs; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Measurement { @@ -44,28 +44,6 @@ pub struct MeasurementGroup { pub measurement: Measurement, } -#[derive(Error, Debug)] -pub enum TestError { - #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] - BadJSONErr(PathBuf, Option), - #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] - ReadErr(PathBuf, Option), - #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())] - MissingFilenameErr(PathBuf), - #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] - FilenameNotUnicodeErr(PathBuf), - #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] - BadFileContentsErr(PathBuf, Option), - #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())] - NoResultsErr(PathBuf), - #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())] - OddResultsCountErr(usize, PathBuf), - #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::>())] - BadGroupSizeErr(usize, Vec), - #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)] - BadBranchNameErr(String, String), -} - fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec { let median_threshold = 1.05; // 5% regression threshold let median_difference = dev.median / baseline.median; @@ -101,9 +79,10 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Result, TestError> { +) -> Result, CalculateError> { let result_files = fs::read_dir(results_directory) - .or_else(|e| Err(TestError::ReadErr(results_directory.to_path_buf(), Some(e))))?; + .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e)))) + .or_else(|e| Err(CalculateError::CalculateIOError(e)))?; result_files .into_iter() @@ -116,10 +95,11 @@ fn measurements_from_files( }) .map(|filename| { fs::read_to_string(&filename) - .or_else(|e| Err(TestError::BadFileContentsErr(filename.clone(), Some(e)))) + .or_else(|e| Err(IOError::BadFileContentsErr(filename.clone(), Some(e)))) + .or_else(|e| Err(CalculateError::CalculateIOError(e))) .and_then(|contents| { serde_json::from_str::(&contents) - .or_else(|e| Err(TestError::BadJSONErr(filename.clone(), Some(e)))) + .or_else(|e| Err(CalculateError::BadJSONErr(filename.clone(), Some(e)))) }) .map(|m| (filename, m)) }) @@ -130,15 +110,15 @@ fn measurements_from_files( // measurements together by filename fn calculate_regressions( measurements: &[(PathBuf, Measurement)], -) -> Result, TestError> { +) -> Result, CalculateError> { let mut measurement_groups: Vec = measurements .into_iter() .map(|(p, m)| { p.file_name() - .ok_or_else(|| TestError::MissingFilenameErr(p.to_path_buf())) + .ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf())) .and_then(|name| { name.to_str() - .ok_or_else(|| TestError::FilenameNotUnicodeErr(p.to_path_buf())) + .ok_or_else(|| IOError::FilenameNotUnicodeErr(p.to_path_buf())) }) .map(|name| { let parts: Vec<&str> = name.split("_").collect(); @@ -149,7 +129,8 @@ fn calculate_regressions( } }) }) - .collect::, TestError>>()?; + .collect::, IOError>>() + .or_else(|e| Err(CalculateError::CalculateIOError(e)))?; measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version))); @@ -172,24 +153,24 @@ fn calculate_regressions( if dev.version == "dev" && baseline.version == "baseline" { Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement)) } else { - Err(TestError::BadBranchNameErr(baseline.version.clone(), dev.version.clone())) + Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone())) } }, - i => Err(TestError::BadGroupSizeErr(i, groups)), + i => Err(CalculateError::BadGroupSizeErr(i, groups)), } }) - .collect::>, TestError>>()? + .collect::>, CalculateError>>()? .concat(); Ok(calculations) } -pub fn regressions(results_directory: &PathBuf) -> Result, TestError> { +pub fn regressions(results_directory: &PathBuf) -> Result, CalculateError> { measurements_from_files(Path::new(&results_directory)).and_then(|v| { // exit early with an Err if there are no results to process match v.len() { - 0 => Err(TestError::NoResultsErr(results_directory.clone())), - i if i % 2 == 1 => Err(TestError::OddResultsCountErr(i, results_directory.clone())), + 0 => Err(CalculateError::NoResultsErr(results_directory.clone())), + i if i % 2 == 1 => Err(CalculateError::OddResultsCountErr(i, results_directory.clone())), _ => Ok(()), }?; @@ -202,86 +183,3 @@ pub fn regressions(results_directory: &PathBuf) -> Result, Test calculate_regressions(&v_next) }) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_error_messages() { - let pairs = vec![ - ( - TestError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), - r#"BadJSONErr: JSON in file cannot be deserialized as expected. -Filepath: dummy/path/file.json -Originating Exception:None"# - ), - ( - TestError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None), - r#"ReadErr: The file cannot be read. -Filepath: dummy/path/file.json -Originating Exception:None"# - ), - ( - TestError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()), - r#"MissingFilenameErr: The path provided does not specify a file. -Filepath: dummy/path/no_file/"# - ), - ( - TestError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()), - r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file. -Filepath: dummy/path/no_file/"# - ), - ( - TestError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None), - r#"BadFileContentsErr: Check that the file exists and is readable. -Filepath: dummy/path/filenotexist.json -Originating Exception:None"# - ), - ( - TestError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()), - r#"NoResultsErr: The results directory has no json files in it. -Filepath: dummy/path/no_file/"# - ), - ( - TestError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()), - r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number. -File Count: 3 -Filepath: dummy/path/no_file/"# - ), - ( - TestError::BadGroupSizeErr(1, vec![MeasurementGroup { - version: "dev".to_owned(), - run: "some command".to_owned(), - measurement: Measurement { - command: "some command".to_owned(), - mean: 1.0, - stddev: 1.0, - median: 1.0, - user: 1.0, - system: 1.0, - min: 1.0, - max: 1.0, - times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1], - } - }]), - r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair. -Count: 1 -Group: [("dev", "some command")]"# - ), - ( - TestError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()), - r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'. -Found: boop, noop"# - ), - - - - ]; - - for (err, msg) in pairs { - assert_eq!(format!("{}", err), msg) - } - } - -} diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs new file mode 100644 index 00000000000..6166a59dd50 --- /dev/null +++ b/performance/runner/src/exceptions.rs @@ -0,0 +1,140 @@ +use crate::calculate::*; +use std::path::PathBuf; +use std::io; +use std::path::Path; +use thiserror::Error; + + +#[derive(Debug, Error)] +pub enum IOError { + #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + ReadErr(PathBuf, Option), + #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())] + MissingFilenameErr(PathBuf), + #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] + FilenameNotUnicodeErr(PathBuf), + #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + BadFileContentsErr(PathBuf, Option), + #[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + CommandErr(Option), +} + + +#[derive(Debug, Error)] +pub enum CalculateError { + #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + BadJSONErr(PathBuf, Option), + #[error("{}", .0)] + CalculateIOError(IOError), + #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())] + NoResultsErr(PathBuf), + #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())] + OddResultsCountErr(usize, PathBuf), + #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::>())] + BadGroupSizeErr(usize, Vec), + #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)] + BadBranchNameErr(String, String), +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_io_error_messages() { + let pairs = vec![ + ( + IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None), + r#"ReadErr: The file cannot be read. +Filepath: dummy/path/file.json +Originating Exception:None"# + ), + ( + IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"MissingFilenameErr: The path provided does not specify a file. +Filepath: dummy/path/no_file/"# + ), + ( + IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file. +Filepath: dummy/path/no_file/"# + ), + ( + IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None), + r#"BadFileContentsErr: Check that the file exists and is readable. +Filepath: dummy/path/filenotexist.json +Originating Exception:None"# + ), + ( + IOError::CommandErr(None), + r#"CommandErr: System command failed to run. +Originating Exception: None"# + ), + ]; + + for (err, msg) in pairs { + assert_eq!(format!("{}", err), msg) + } + } + + + #[test] + fn test_calculate_error_messages() { + let pairs = vec![ + ( + CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), + r#"BadJSONErr: JSON in file cannot be deserialized as expected. +Filepath: dummy/path/file.json +Originating Exception:None"# + ), + ( + CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), + r#"BadJSONErr: JSON in file cannot be deserialized as expected. +Filepath: dummy/path/file.json +Originating Exception:None"# + ), + ( + CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()), + r#"NoResultsErr: The results directory has no json files in it. +Filepath: dummy/path/no_file/"# + ), + ( + CalculateError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()), + r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number. +File Count: 3 +Filepath: dummy/path/no_file/"# + ), + ( + CalculateError::BadGroupSizeErr(1, vec![MeasurementGroup { + version: "dev".to_owned(), + run: "some command".to_owned(), + measurement: Measurement { + command: "some command".to_owned(), + mean: 1.0, + stddev: 1.0, + median: 1.0, + user: 1.0, + system: 1.0, + min: 1.0, + max: 1.0, + times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1], + } + }]), + r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair. +Count: 1 +Group: [("dev", "some command")]"# + ), + ( + CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()), + r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'. +Found: boop, noop"# + ), + ]; + + for (err, msg) in pairs { + assert_eq!(format!("{}", err), msg) + } + } + +} diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 5c289bf3a27..1f9482cb7dd 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -1,6 +1,7 @@ extern crate structopt; mod calculate; +mod exceptions; mod measure; use crate::calculate::Calculation; diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index 91222aa5f1a..c15d30f1ae5 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -1,8 +1,9 @@ extern crate structopt; +use crate::exceptions::IOError; use std::path::PathBuf; use std::process::{Command, ExitStatus}; -use std::{fs, io}; +use std::fs; use structopt::StructOpt; #[derive(Clone, Debug, StructOpt)] @@ -38,7 +39,7 @@ impl Metric<'_> { } // calls hyperfine via system command, and returns result of runs -pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result, io::Error> { +pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result, IOError> { // to add a new metric to the test suite, simply define it in this list: // TODO read from some config file? let metrics: Vec = vec![ @@ -49,18 +50,25 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result Result>>() + .collect::>>() }) .flatten() .collect() From 2f9907b07291c2814298529d918f3c0ea1247ee7 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 12:34:57 -0400 Subject: [PATCH 53/81] remove one more unwrap --- performance/runner/src/calculate.rs | 24 ++++++++++++++++-------- performance/runner/src/exceptions.rs | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 3f2f5f52cba..7fdf7135564 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -3,6 +3,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::fs; +use std::fs::DirEntry; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Measurement { @@ -86,22 +87,29 @@ fn measurements_from_files( result_files .into_iter() - .map(|f| f.unwrap().path()) - .filter(|filename| { - filename + .map(|entry| { + let ent: DirEntry = entry.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e)))) + .or_else(|e| Err(CalculateError::CalculateIOError(e)))?; + + Ok(ent.path()) + }) + .collect::, CalculateError>>()? + .into_iter() + .filter(|path| { + path .extension() .and_then(|ext| ext.to_str()) .map_or(false, |ext| ext.ends_with("json")) }) - .map(|filename| { - fs::read_to_string(&filename) - .or_else(|e| Err(IOError::BadFileContentsErr(filename.clone(), Some(e)))) + .map(|path| { + fs::read_to_string(&path) + .or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e)))) .or_else(|e| Err(CalculateError::CalculateIOError(e))) .and_then(|contents| { serde_json::from_str::(&contents) - .or_else(|e| Err(CalculateError::BadJSONErr(filename.clone(), Some(e)))) + .or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e)))) }) - .map(|m| (filename, m)) + .map(|m| (path, m)) }) .collect() } diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs index 6166a59dd50..66433db5c6d 100644 --- a/performance/runner/src/exceptions.rs +++ b/performance/runner/src/exceptions.rs @@ -1,6 +1,7 @@ use crate::calculate::*; use std::path::PathBuf; use std::io; +#[cfg(test)] use std::path::Path; use thiserror::Error; From 3c8daacd3ed3f2ae8f3fb3cd5062fea633bb478c Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 13:12:01 -0400 Subject: [PATCH 54/81] refactor for simpler flow --- performance/runner/src/measure.rs | 76 +++++++++++++++++-------------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index c15d30f1ae5..45ac0700163 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -49,46 +49,52 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result>(); - let path: PathBuf = ent.path(); - let project_name: &str = path - .file_name() - .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf())) - .and_then(|x| x.to_str().ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf())))?; - - Command::new("hyperfine") - .current_dir(&path) - // warms filesystem caches by running the command first without counting it. - // alternatively we could clear them before each run - .arg("--warmup") - .arg("1") - .arg("--prepare") - .arg(metric.prepare) - .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) - .arg("--export-json") - .arg( - ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""), - ) - // this prevents capture dbt output. Noisy, but good for debugging when tests fail. - .arg("--show-output") - .status() // use spawn() here instead for more information - .or_else(|e| Err(IOError::CommandErr(Some(e)))) - }) - .collect::>>() + Ok(pairs) }) - .flatten() - .collect() + .collect::>, IOError>>()? + .concat() + .into_iter() + // run hyperfine on each pairing + .map(|(path, project_name, metric)| { + Command::new("hyperfine") + .current_dir(&path) + // warms filesystem caches by running the command first without counting it. + // alternatively we could clear them before each run + .arg("--warmup") + .arg("1") + .arg("--prepare") + .arg(metric.prepare) + .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join("")) + .arg("--export-json") + .arg( + ["../../results/", &metric.outfile(&project_name, &dbt_branch)].join(""), + ) + // this prevents capture dbt output. Noisy, but good for debugging when tests fail. + .arg("--show-output") + .status() // use spawn() here instead for more information + .or_else(|e| Err(IOError::CommandErr(Some(e)))) + } + ) + .collect() } From 94c6cf1b3c48452ad97605aa16f281176cc1cc5c Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 13:19:39 -0400 Subject: [PATCH 55/81] remove some clones --- performance/runner/src/calculate.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 7fdf7135564..adb09292235 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -59,8 +59,8 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Vec Date: Wed, 4 Aug 2021 13:31:48 -0400 Subject: [PATCH 56/81] minor refactor --- performance/runner/src/calculate.rs | 35 +++++++++++++++++------------ 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index adb09292235..e297aa96157 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -174,20 +174,27 @@ fn calculate_regressions( } pub fn regressions(results_directory: &PathBuf) -> Result, CalculateError> { - measurements_from_files(Path::new(&results_directory)).and_then(|v| { + measurements_from_files(Path::new(&results_directory)).and_then(|v| match v[..] { // exit early with an Err if there are no results to process - match v.len() { - 0 => Err(CalculateError::NoResultsErr(results_directory.clone())), - i if i % 2 == 1 => Err(CalculateError::OddResultsCountErr(i, results_directory.clone())), - _ => Ok(()), - }?; - - let v_next: Vec<(PathBuf, Measurement)> = v - .into_iter() - // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` - .map(|(p, ms)| (p, ms.results[0].clone())) - .collect(); - - calculate_regressions(&v_next) + [] => { + Err(CalculateError::NoResultsErr(results_directory.clone())) + }, + + _ => { + // we expect two runs for each project-metric pairing: one for each branch, baseline + // and dev. An odd result count is unexpected. + if v.len() % 2 == 1 { + Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone())) + } else { + // otherwise, we can do our comparisons + let measurements = v + .into_iter() + // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` + .map(|(p, ms)| (p, ms.results[0].clone())) + .collect::>(); + + calculate_regressions(&measurements) + } + }, }) } From b77eff8f6fcd026e40564e69de7cc2ae3a69858d Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 13:34:46 -0400 Subject: [PATCH 57/81] errors to warnings in ci --- .github/workflows/performance.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 01fa3855e5f..adb3528c03b 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -20,6 +20,9 @@ jobs: test-runner: name: Test Runner runs-on: ubuntu-latest + env: + # turns errors into warnings + RUSTFLAGS: "-D warnings" steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 From 0b4689f311703b2bd7b4a41bb23c245f8d77373e Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 15:14:37 -0400 Subject: [PATCH 58/81] address PR feedback --- .github/workflows/performance.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index adb3528c03b..94d111f5b9e 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -86,9 +86,14 @@ jobs: name: dev-results path: performance/results/ - # run the performance measurements on the latest release branch - # this part takes by far the longest, so we do everything we can first - # so the job fails fast. + # run the performance measurements on the release branch which we use + # as a performance baseline. This part takes by far the longest, so + # we do everything we can first so the job fails fast. + # ----- + # we need to checkout dbt twice in this job: once for the baseline dbt + # version, and once to get the latest regression testing projects, + # metrics, and runner code from the develop or current branch so that + # the calculations match for both versions of dbt we are comparing. measure-baseline: needs: [build-runner] name: Measure Baseline Branch @@ -134,7 +139,6 @@ jobs: name: Compare Results runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - uses: actions/download-artifact@v2 with: name: dev-results From 4b1c6b51f9f2975b5a09ae01f16644cac1cefc2f Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 16:39:53 -0400 Subject: [PATCH 59/81] fix spacing --- performance/runner/src/exceptions.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs index 66433db5c6d..59062c3e3be 100644 --- a/performance/runner/src/exceptions.rs +++ b/performance/runner/src/exceptions.rs @@ -8,13 +8,13 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum IOError { - #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] ReadErr(PathBuf, Option), #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())] MissingFilenameErr(PathBuf), #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())] FilenameNotUnicodeErr(PathBuf), - #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] BadFileContentsErr(PathBuf, Option), #[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] CommandErr(Option), @@ -23,7 +23,7 @@ pub enum IOError { #[derive(Debug, Error)] pub enum CalculateError { - #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception:{}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] + #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] BadJSONErr(PathBuf, Option), #[error("{}", .0)] CalculateIOError(IOError), @@ -49,7 +49,7 @@ mod tests { IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"ReadErr: The file cannot be read. Filepath: dummy/path/file.json -Originating Exception:None"# +Originating Exception: None"# ), ( IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()), @@ -65,7 +65,7 @@ Filepath: dummy/path/no_file/"# IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None), r#"BadFileContentsErr: Check that the file exists and is readable. Filepath: dummy/path/filenotexist.json -Originating Exception:None"# +Originating Exception: None"# ), ( IOError::CommandErr(None), @@ -87,13 +87,13 @@ Originating Exception: None"# CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"BadJSONErr: JSON in file cannot be deserialized as expected. Filepath: dummy/path/file.json -Originating Exception:None"# +Originating Exception: None"# ), ( CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"BadJSONErr: JSON in file cannot be deserialized as expected. Filepath: dummy/path/file.json -Originating Exception:None"# +Originating Exception: None"# ), ( CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()), From fe0b9e7ef5d2bf6eab960d210ffe820d6ea8e085 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 17:00:16 -0400 Subject: [PATCH 60/81] refactor to use references better --- performance/runner/src/calculate.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index e297aa96157..52ead4a34ff 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -81,11 +81,9 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Result, CalculateError> { - let result_files = fs::read_dir(results_directory) + fs::read_dir(results_directory) .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e)))) - .or_else(|e| Err(CalculateError::CalculateIOError(e)))?; - - result_files + .or_else(|e| Err(CalculateError::CalculateIOError(e)))? .into_iter() .map(|entry| { let ent: DirEntry = entry.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e)))) @@ -94,7 +92,7 @@ fn measurements_from_files( Ok(ent.path()) }) .collect::, CalculateError>>()? - .into_iter() + .iter() .filter(|path| { path .extension() @@ -102,14 +100,14 @@ fn measurements_from_files( .map_or(false, |ext| ext.ends_with("json")) }) .map(|path| { - fs::read_to_string(&path) + fs::read_to_string(path) .or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e)))) .or_else(|e| Err(CalculateError::CalculateIOError(e))) .and_then(|contents| { serde_json::from_str::(&contents) .or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e)))) }) - .map(|m| (path, m)) + .map(|m| (path.clone(), m)) }) .collect() } From 02007b36199f5a73944a43665b28dfd8d491a71a Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 17:05:23 -0400 Subject: [PATCH 61/81] more reference handling --- performance/runner/src/measure.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index 45ac0700163..f2cce2d3156 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -39,7 +39,7 @@ impl Metric<'_> { } // calls hyperfine via system command, and returns result of runs -pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result, IOError> { +pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result, IOError> { // to add a new metric to the test suite, simply define it in this list: // TODO read from some config file? let metrics: Vec = vec![ @@ -65,20 +65,19 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result>(); + .collect::)>>(); Ok(pairs) }) - .collect::>, IOError>>()? + .collect::)>>, IOError>>()? .concat() - .into_iter() + .iter() // run hyperfine on each pairing .map(|(path, project_name, metric)| { Command::new("hyperfine") - .current_dir(&path) + .current_dir(path) // warms filesystem caches by running the command first without counting it. // alternatively we could clear them before each run .arg("--warmup") @@ -88,7 +87,7 @@ pub fn measure(projects_directory: &PathBuf, dbt_branch: &str) -> Result Date: Wed, 4 Aug 2021 17:33:29 -0400 Subject: [PATCH 62/81] make the happy path use references and clone in the exception paths --- performance/runner/src/calculate.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 52ead4a34ff..40ed654b899 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -118,7 +118,7 @@ fn calculate_regressions( measurements: &[(PathBuf, Measurement)], ) -> Result, CalculateError> { let mut measurement_groups: Vec = measurements - .into_iter() + .iter() .map(|(p, m)| { p.file_name() .ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf())) @@ -144,11 +144,11 @@ fn calculate_regressions( let sorted_measurement_groups = measurement_groups; let calculations: Vec = sorted_measurement_groups - .into_iter() + .iter() .group_by(|x| x.run.clone()) .into_iter() .map(|(_, g)| { - let mut groups: Vec = g.collect(); + let mut groups: Vec<&MeasurementGroup> = g.collect(); groups.sort_by(|x, y| x.version.cmp(&y.version)); match groups.len() { @@ -162,7 +162,10 @@ fn calculate_regressions( Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone())) } }, - i => Err(CalculateError::BadGroupSizeErr(i, groups)), + i => { + let gs: Vec = groups.into_iter().map(|x| x.clone()).collect(); + Err(CalculateError::BadGroupSizeErr(i, gs)) + }, } }) .collect::>, CalculateError>>()? From 44a8f6a3bfdf0278906a6c19893c140ae9bb7999 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 17:46:45 -0400 Subject: [PATCH 63/81] more reference improvements --- performance/runner/src/calculate.rs | 40 +++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 40ed654b899..2609e70a8fd 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -115,7 +115,7 @@ fn measurements_from_files( // given a list of filename-measurement pairs, detect any regressions by grouping // measurements together by filename fn calculate_regressions( - measurements: &[(PathBuf, Measurement)], + measurements: &[(&PathBuf, &Measurement)], ) -> Result, CalculateError> { let mut measurement_groups: Vec = measurements .iter() @@ -131,7 +131,7 @@ fn calculate_regressions( MeasurementGroup { version: parts[0].to_owned(), run: parts[1..].join("_"), - measurement: m.clone(), + measurement: (*m).clone(), } }) }) @@ -175,27 +175,23 @@ fn calculate_regressions( } pub fn regressions(results_directory: &PathBuf) -> Result, CalculateError> { - measurements_from_files(Path::new(&results_directory)).and_then(|v| match v[..] { + measurements_from_files(Path::new(&results_directory)).and_then(|v| { // exit early with an Err if there are no results to process - [] => { + if v.len() <= 0 { Err(CalculateError::NoResultsErr(results_directory.clone())) - }, - - _ => { - // we expect two runs for each project-metric pairing: one for each branch, baseline - // and dev. An odd result count is unexpected. - if v.len() % 2 == 1 { - Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone())) - } else { - // otherwise, we can do our comparisons - let measurements = v - .into_iter() - // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` - .map(|(p, ms)| (p, ms.results[0].clone())) - .collect::>(); - - calculate_regressions(&measurements) - } - }, + // we expect two runs for each project-metric pairing: one for each branch, baseline + // and dev. An odd result count is unexpected. + } else if v.len() % 2 == 1 { + Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone())) + } else { + // otherwise, we can do our comparisons + let measurements = v + .iter() + // the way we're running these, the files will each contain exactly one measurement, hence `results[0]` + .map(|(p, ms)| (p, &ms.results[0])) + .collect::>(); + + calculate_regressions(&measurements[..]) + } }) } From 02839ec77916419f9f9f5b0204c1567efc8bd2bf Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 17:48:13 -0400 Subject: [PATCH 64/81] iter instead of into_iter --- performance/runner/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 1f9482cb7dd..e02a4f1ce67 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -89,8 +89,8 @@ fn main() { f.write_all(json_calcs.as_bytes()).expect("unable to write data"); // filter for regressions - let regressions: Vec = calculations - .into_iter() + let regressions: Vec<&Calculation> = calculations + .iter() .filter(|c| c.regression) .collect(); From 88acf0727ba18780aa7077688e7859733a732c8d Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 4 Aug 2021 17:50:32 -0400 Subject: [PATCH 65/81] remove clone --- performance/runner/src/calculate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 2609e70a8fd..324f33f2762 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -145,7 +145,7 @@ fn calculate_regressions( let calculations: Vec = sorted_measurement_groups .iter() - .group_by(|x| x.run.clone()) + .group_by(|x| &x.run) .into_iter() .map(|(_, g)| { let mut groups: Vec<&MeasurementGroup> = g.collect(); From 2d3d1b030a895654d8d634f2b388159d2c557135 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 09:47:34 -0400 Subject: [PATCH 66/81] rename to dummy project --- .../dbt_project.yml | 0 .../models/path_0/node_0.sql | 0 .../models/path_0/node_0.yml | 0 .../models/path_0/node_1.sql | 0 .../models/path_0/node_1.yml | 0 .../models/path_0/node_2.sql | 0 .../models/path_0/node_2.yml | 0 .../dbt_project.yml | 0 .../models/path_0/node_0.sql | 0 .../models/path_0/node_0.yml | 0 .../models/path_0/node_1.sql | 0 .../models/path_0/node_1.yml | 0 .../models/path_0/node_2.sql | 0 .../models/path_0/node_2.yml | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/dbt_project.yml (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_0.sql (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_0.yml (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_1.sql (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_1.yml (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_2.sql (100%) rename performance/projects/{01_mini_project_dont_merge => 01_dummy_project}/models/path_0/node_2.yml (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/dbt_project.yml (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_0.sql (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_0.yml (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_1.sql (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_1.yml (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_2.sql (100%) rename performance/projects/{02_mini_project_dont_merge => 02_dummy_project}/models/path_0/node_2.yml (100%) diff --git a/performance/projects/01_mini_project_dont_merge/dbt_project.yml b/performance/projects/01_dummy_project/dbt_project.yml similarity index 100% rename from performance/projects/01_mini_project_dont_merge/dbt_project.yml rename to performance/projects/01_dummy_project/dbt_project.yml diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/01_dummy_project/models/path_0/node_0.sql similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_0.sql rename to performance/projects/01_dummy_project/models/path_0/node_0.sql diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/01_dummy_project/models/path_0/node_0.yml similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_0.yml rename to performance/projects/01_dummy_project/models/path_0/node_0.yml diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/01_dummy_project/models/path_0/node_1.sql similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_1.sql rename to performance/projects/01_dummy_project/models/path_0/node_1.sql diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/01_dummy_project/models/path_0/node_1.yml similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_1.yml rename to performance/projects/01_dummy_project/models/path_0/node_1.yml diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/01_dummy_project/models/path_0/node_2.sql similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_2.sql rename to performance/projects/01_dummy_project/models/path_0/node_2.sql diff --git a/performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/01_dummy_project/models/path_0/node_2.yml similarity index 100% rename from performance/projects/01_mini_project_dont_merge/models/path_0/node_2.yml rename to performance/projects/01_dummy_project/models/path_0/node_2.yml diff --git a/performance/projects/02_mini_project_dont_merge/dbt_project.yml b/performance/projects/02_dummy_project/dbt_project.yml similarity index 100% rename from performance/projects/02_mini_project_dont_merge/dbt_project.yml rename to performance/projects/02_dummy_project/dbt_project.yml diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql b/performance/projects/02_dummy_project/models/path_0/node_0.sql similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_0.sql rename to performance/projects/02_dummy_project/models/path_0/node_0.sql diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml b/performance/projects/02_dummy_project/models/path_0/node_0.yml similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_0.yml rename to performance/projects/02_dummy_project/models/path_0/node_0.yml diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql b/performance/projects/02_dummy_project/models/path_0/node_1.sql similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_1.sql rename to performance/projects/02_dummy_project/models/path_0/node_1.sql diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml b/performance/projects/02_dummy_project/models/path_0/node_1.yml similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_1.yml rename to performance/projects/02_dummy_project/models/path_0/node_1.yml diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql b/performance/projects/02_dummy_project/models/path_0/node_2.sql similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_2.sql rename to performance/projects/02_dummy_project/models/path_0/node_2.sql diff --git a/performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml b/performance/projects/02_dummy_project/models/path_0/node_2.yml similarity index 100% rename from performance/projects/02_mini_project_dont_merge/models/path_0/node_2.yml rename to performance/projects/02_dummy_project/models/path_0/node_2.yml From 6c8de62b24bc4df5478394d7fd44a6225d934699 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 09:48:33 -0400 Subject: [PATCH 67/81] up stddev threshold --- performance/runner/src/calculate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 324f33f2762..a47db8a91bd 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -49,7 +49,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Date: Thu, 5 Aug 2021 11:03:13 -0400 Subject: [PATCH 68/81] add lots of comments --- performance/runner/src/calculate.rs | 36 +++++++++++++++++++++--- performance/runner/src/exceptions.rs | 11 +++++++- performance/runner/src/main.rs | 42 +++++++++++++++++----------- performance/runner/src/measure.rs | 40 ++++++++++---------------- 4 files changed, 83 insertions(+), 46 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index a47db8a91bd..c5fd0e2eb90 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -5,6 +5,10 @@ use std::path::{Path, PathBuf}; use std::fs; use std::fs::DirEntry; + +// This type exactly matches the type of array elements +// from hyperfine's output. Deriving `Serialize` and `Deserialize` +// gives us read and write capabilities via json_serde. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Measurement { pub command: String, @@ -18,11 +22,18 @@ pub struct Measurement { pub times: Vec, } + +// This type exactly matches the type of hyperfine's output. +// Deriving `Serialize` and `Deserialize` gives us read and +// write capabilities via json_serde. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Measurements { pub results: Vec, } + +// Output data from a comparison between runs on the baseline +// and dev branches. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Data { pub threshold: f64, @@ -31,6 +42,8 @@ pub struct Data { pub dev: f64, } +// The full output from a comparison between runs on the baseline +// and dev branches. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Calculation { pub metric: String, @@ -38,6 +51,8 @@ pub struct Calculation { pub data: Data, } +// A type to describe which measurement we are working with. This +// information is parsed from the filename of hyperfine's output. #[derive(Debug, Clone, PartialEq)] pub struct MeasurementGroup { pub version: String, @@ -45,6 +60,8 @@ pub struct MeasurementGroup { pub measurement: Measurement, } +// Given two measurements, return all the calculations. Calculations are +// flagged as regressions or not regressions. fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec { let median_threshold = 1.05; // 5% regression threshold let median_difference = dev.median / baseline.median; @@ -76,8 +93,8 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Result, CalculateError> { @@ -112,11 +129,19 @@ fn measurements_from_files( .collect() } -// given a list of filename-measurement pairs, detect any regressions by grouping -// measurements together by filename +// Given a list of filename-measurement pairs, detect any regressions by grouping +// measurements together by filename. fn calculate_regressions( measurements: &[(&PathBuf, &Measurement)], ) -> Result, CalculateError> { + /* + Strategy of this function body: + 1. [Measurement] -> [MeasurementGroup] + 2. Sort the MeasurementGroups + 3. Group the MeasurementGroups by "run" + 4. Call `calculate` with the two resulting Measurements as input + */ + let mut measurement_groups: Vec = measurements .iter() .map(|(p, m)| { @@ -174,6 +199,9 @@ fn calculate_regressions( Ok(calculations) } +// Top-level function. Given a path for the result directory, call the above +// functions to compare and collect calculations. Calculations include both +// metrics that fall within the threshold and regressions. pub fn regressions(results_directory: &PathBuf) -> Result, CalculateError> { measurements_from_files(Path::new(&results_directory)).and_then(|v| { // exit early with an Err if there are no results to process diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs index 59062c3e3be..73a821d7408 100644 --- a/performance/runner/src/exceptions.rs +++ b/performance/runner/src/exceptions.rs @@ -6,6 +6,9 @@ use std::path::Path; use thiserror::Error; +// Custom IO Error messages for the IO errors we encounter. +// New constructors should be added to wrap any new IO errors. +// The desired output of these errors is tested below. #[derive(Debug, Error)] pub enum IOError { #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] @@ -20,7 +23,10 @@ pub enum IOError { CommandErr(Option), } - +// Custom Error messages for the error states we could encounter +// during calculation, and are not prevented at compile time. New +// constructors should be added for any new error situations that +// come up. The desired output of these errors is tested below. #[derive(Debug, Error)] pub enum CalculateError { #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))] @@ -38,10 +44,12 @@ pub enum CalculateError { } +// Tests for exceptions #[cfg(test)] mod tests { use super::*; + // Tests the output fo io error messages. There should be at least one per enum constructor. #[test] fn test_io_error_messages() { let pairs = vec![ @@ -80,6 +88,7 @@ Originating Exception: None"# } + // Tests the output fo calculate error messages. There should be at least one per enum constructor. #[test] fn test_calculate_error_messages() { let pairs = vec![ diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index e02a4f1ce67..bd12e060765 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -12,6 +12,9 @@ use std::path::PathBuf; use std::process::exit; use structopt::StructOpt; + +// This type defines the commandline interface and is generated +// by `derive(StructOpt)` #[derive(Clone, Debug, StructOpt)] #[structopt(name = "performance", about = "performance regression testing runner")] enum Opt { @@ -31,27 +34,29 @@ enum Opt { }, } -fn gracefully_exit_or(f: &dyn Fn(A) -> B, r: Result) -> B { - match r { - Err(e) => { - println!("{}", e); - exit(1) - }, - Ok(x) => f(x), - } +// Given a result with a displayable error, map the value if there is one, or +// print the human readable error message and exit with exit code 1. +fn map_or_gracefully_exit(f: &dyn Fn(A) -> B, r: Result) -> B { + r.map_or_else(|e| { println!("{}", e); exit(1) }, f) } // This is where all the printing, exiting, and error displaying // should happen. Module functions should only return values. fn main() { + // match what the user inputs from the cli match Opt::from_args() { + // measure subcommand Opt::Measure { projects_dir, branch_name } => { - let statuses = gracefully_exit_or( + // get all the statuses of the hyperfine runs or + // gracefully show the user an exception + let statuses = map_or_gracefully_exit( &|x| x, measure::measure(&projects_dir, &branch_name) ); + // only exit with exit code 0 if all hyperfine runs were + // dispatched correctly. for status in statuses { match status.code() { None => (), @@ -65,28 +70,31 @@ fn main() { } }, + // calculate subcommand Opt::Calculate { results_dir } => { - let calculations = gracefully_exit_or( + // get all the calculations or gracefully show the user an exception + let calculations = map_or_gracefully_exit( &|x| x, calculate::regressions(&results_dir) ); - // print calculations to stdout + // print all calculations to stdout so they can be easily debugged + // via CI. println!(":: All Calculations ::\n"); for c in &calculations { println!("{:#?}\n", c); } println!(""); - // write calculations to file so it can be downloaded as an artifact + // indented json string representation of the calculations array let json_calcs = serde_json::to_string_pretty(&calculations) - .expect("failed to serialize calculations to json"); + .expect("Main: Failed to serialize calculations to json"); + // create the empty destination file, and write the json string let outfile = &mut results_dir.into_os_string(); outfile.push("/final_calculations.json"); - - let mut f = File::create(outfile).expect("Unable to create file"); - f.write_all(json_calcs.as_bytes()).expect("unable to write data"); + let mut f = File::create(outfile).expect("Main: Unable to create file"); + f.write_all(json_calcs.as_bytes()).expect("Main: Unable to write data"); // filter for regressions let regressions: Vec<&Calculation> = calculations @@ -98,6 +106,8 @@ fn main() { match regressions[..] { [] => println!("congrats! no regressions :)"), _ => { + // print all calculations to stdout so they can be easily debugged + // via CI. println!(":: Regressions Found ::\n"); println!(""); for r in regressions { diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index f2cce2d3156..94b68d2581d 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -1,30 +1,11 @@ -extern crate structopt; - use crate::exceptions::IOError; use std::path::PathBuf; use std::process::{Command, ExitStatus}; use std::fs; -use structopt::StructOpt; -#[derive(Clone, Debug, StructOpt)] -#[structopt(name = "performance", about = "performance regression testing runner")] -enum Performance { - #[structopt(name = "measure")] - Measure { - #[structopt(parse(from_os_str))] - #[structopt(short)] - projects_dir: PathBuf, - #[structopt(short)] - branch_name: bool, - }, - #[structopt(name = "compare")] - Compare { - #[structopt(parse(from_os_str))] - #[structopt(short)] - results_dir: PathBuf, - }, -} +// `Metric` defines a dbt command that we want to measure on both the +// baseline and dev branches. #[derive(Debug, Clone)] struct Metric<'a> { name: &'a str, @@ -33,15 +14,23 @@ struct Metric<'a> { } impl Metric<'_> { + // Returns the proper filename for the hyperfine output for this metric. fn outfile(&self, project: &str, branch: &str) -> String { [branch, "_", self.name, "_", project, ".json"].join("") } } -// calls hyperfine via system command, and returns result of runs +// Calls hyperfine via system command, and returns all the exit codes for each hyperfine run. pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result, IOError> { - // to add a new metric to the test suite, simply define it in this list: - // TODO read from some config file? + /* + Strategy of this function body: + 1. Read all directory names in `projects_directory` + 2. Pair `n` projects with `m` metrics for a total of n*m pairs + 3. Run hyperfine on each project-metric pair + */ + + // To add a new metric to the test suite, simply define it in this list: + // TODO: This could be read from a config file in a future version. let metrics: Vec = vec![ Metric { name: "parse", @@ -89,7 +78,8 @@ pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result Date: Thu, 5 Aug 2021 11:10:07 -0400 Subject: [PATCH 69/81] fmt --- performance/runner/src/calculate.rs | 35 +++++++------- performance/runner/src/exceptions.rs | 71 +++++++++++++++------------- performance/runner/src/main.rs | 46 +++++++++--------- performance/runner/src/measure.rs | 42 ++++++++-------- 4 files changed, 101 insertions(+), 93 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index c5fd0e2eb90..055cb11be76 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -1,10 +1,9 @@ use crate::exceptions::{CalculateError, IOError}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; use std::fs; use std::fs::DirEntry; - +use std::path::{Path, PathBuf}; // This type exactly matches the type of array elements // from hyperfine's output. Deriving `Serialize` and `Deserialize` @@ -22,8 +21,7 @@ pub struct Measurement { pub times: Vec, } - -// This type exactly matches the type of hyperfine's output. +// This type exactly matches the type of hyperfine's output. // Deriving `Serialize` and `Deserialize` gives us read and // write capabilities via json_serde. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -31,7 +29,6 @@ pub struct Measurements { pub results: Vec, } - // Output data from a comparison between runs on the baseline // and dev branches. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -78,7 +75,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec Vec, CalculateError>>()? .iter() .filter(|path| { - path - .extension() + path.extension() .and_then(|ext| ext.to_str()) .map_or(false, |ext| ext.ends_with("json")) }) @@ -134,7 +131,7 @@ fn measurements_from_files( fn calculate_regressions( measurements: &[(&PathBuf, &Measurement)], ) -> Result, CalculateError> { - /* + /* Strategy of this function body: 1. [Measurement] -> [MeasurementGroup] 2. Sort the MeasurementGroups @@ -180,17 +177,20 @@ fn calculate_regressions( 2 => { let dev = &groups[1]; let baseline = &groups[0]; - + if dev.version == "dev" && baseline.version == "baseline" { Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement)) } else { - Err(CalculateError::BadBranchNameErr(baseline.version.clone(), dev.version.clone())) + Err(CalculateError::BadBranchNameErr( + baseline.version.clone(), + dev.version.clone(), + )) } - }, + } i => { let gs: Vec = groups.into_iter().map(|x| x.clone()).collect(); Err(CalculateError::BadGroupSizeErr(i, gs)) - }, + } } }) .collect::>, CalculateError>>()? @@ -210,7 +210,10 @@ pub fn regressions(results_directory: &PathBuf) -> Result, Calc // we expect two runs for each project-metric pairing: one for each branch, baseline // and dev. An odd result count is unexpected. } else if v.len() % 2 == 1 { - Err(CalculateError::OddResultsCountErr(v.len(), results_directory.clone())) + Err(CalculateError::OddResultsCountErr( + v.len(), + results_directory.clone(), + )) } else { // otherwise, we can do our comparisons let measurements = v diff --git a/performance/runner/src/exceptions.rs b/performance/runner/src/exceptions.rs index 73a821d7408..f278da6c365 100644 --- a/performance/runner/src/exceptions.rs +++ b/performance/runner/src/exceptions.rs @@ -1,11 +1,10 @@ use crate::calculate::*; -use std::path::PathBuf; use std::io; #[cfg(test)] use std::path::Path; +use std::path::PathBuf; use thiserror::Error; - // Custom IO Error messages for the IO errors we encounter. // New constructors should be added to wrap any new IO errors. // The desired output of these errors is tested below. @@ -43,7 +42,6 @@ pub enum CalculateError { BadBranchNameErr(String, String), } - // Tests for exceptions #[cfg(test)] mod tests { @@ -57,28 +55,31 @@ mod tests { IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"ReadErr: The file cannot be read. Filepath: dummy/path/file.json -Originating Exception: None"# +Originating Exception: None"#, ), ( IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()), r#"MissingFilenameErr: The path provided does not specify a file. -Filepath: dummy/path/no_file/"# +Filepath: dummy/path/no_file/"#, ), ( IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()), r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file. -Filepath: dummy/path/no_file/"# +Filepath: dummy/path/no_file/"#, ), ( - IOError::BadFileContentsErr(Path::new("dummy/path/filenotexist.json").to_path_buf(), None), + IOError::BadFileContentsErr( + Path::new("dummy/path/filenotexist.json").to_path_buf(), + None, + ), r#"BadFileContentsErr: Check that the file exists and is readable. Filepath: dummy/path/filenotexist.json -Originating Exception: None"# +Originating Exception: None"#, ), ( IOError::CommandErr(None), r#"CommandErr: System command failed to run. -Originating Exception: None"# +Originating Exception: None"#, ), ]; @@ -87,7 +88,6 @@ Originating Exception: None"# } } - // Tests the output fo calculate error messages. There should be at least one per enum constructor. #[test] fn test_calculate_error_messages() { @@ -96,49 +96,55 @@ Originating Exception: None"# CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"BadJSONErr: JSON in file cannot be deserialized as expected. Filepath: dummy/path/file.json -Originating Exception: None"# +Originating Exception: None"#, ), ( CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None), r#"BadJSONErr: JSON in file cannot be deserialized as expected. Filepath: dummy/path/file.json -Originating Exception: None"# +Originating Exception: None"#, ), ( CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()), r#"NoResultsErr: The results directory has no json files in it. -Filepath: dummy/path/no_file/"# +Filepath: dummy/path/no_file/"#, ), ( - CalculateError::OddResultsCountErr(3, Path::new("dummy/path/no_file/").to_path_buf()), + CalculateError::OddResultsCountErr( + 3, + Path::new("dummy/path/no_file/").to_path_buf(), + ), r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number. File Count: 3 -Filepath: dummy/path/no_file/"# +Filepath: dummy/path/no_file/"#, ), ( - CalculateError::BadGroupSizeErr(1, vec![MeasurementGroup { - version: "dev".to_owned(), - run: "some command".to_owned(), - measurement: Measurement { - command: "some command".to_owned(), - mean: 1.0, - stddev: 1.0, - median: 1.0, - user: 1.0, - system: 1.0, - min: 1.0, - max: 1.0, - times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1], - } - }]), + CalculateError::BadGroupSizeErr( + 1, + vec![MeasurementGroup { + version: "dev".to_owned(), + run: "some command".to_owned(), + measurement: Measurement { + command: "some command".to_owned(), + mean: 1.0, + stddev: 1.0, + median: 1.0, + user: 1.0, + system: 1.0, + min: 1.0, + max: 1.0, + times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1], + }, + }], + ), r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair. Count: 1 -Group: [("dev", "some command")]"# +Group: [("dev", "some command")]"#, ), ( CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()), r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'. -Found: boop, noop"# +Found: boop, noop"#, ), ]; @@ -146,5 +152,4 @@ Found: boop, noop"# assert_eq!(format!("{}", err), msg) } } - } diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index bd12e060765..eb3ad01f0a1 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -12,7 +12,6 @@ use std::path::PathBuf; use std::process::exit; use structopt::StructOpt; - // This type defines the commandline interface and is generated // by `derive(StructOpt)` #[derive(Clone, Debug, StructOpt)] @@ -37,23 +36,29 @@ enum Opt { // Given a result with a displayable error, map the value if there is one, or // print the human readable error message and exit with exit code 1. fn map_or_gracefully_exit(f: &dyn Fn(A) -> B, r: Result) -> B { - r.map_or_else(|e| { println!("{}", e); exit(1) }, f) + r.map_or_else( + |e| { + println!("{}", e); + exit(1) + }, + f, + ) } -// This is where all the printing, exiting, and error displaying +// This is where all the printing, exiting, and error displaying // should happen. Module functions should only return values. fn main() { // match what the user inputs from the cli match Opt::from_args() { - // measure subcommand - Opt::Measure { projects_dir, branch_name } => { + Opt::Measure { + projects_dir, + branch_name, + } => { // get all the statuses of the hyperfine runs or // gracefully show the user an exception - let statuses = map_or_gracefully_exit( - &|x| x, - measure::measure(&projects_dir, &branch_name) - ); + let statuses = + map_or_gracefully_exit(&|x| x, measure::measure(&projects_dir, &branch_name)); // only exit with exit code 0 if all hyperfine runs were // dispatched correctly. @@ -65,18 +70,15 @@ fn main() { Some(nonzero) => { println!("a child process exited with a nonzero status code."); exit(nonzero) - }, + } } } - }, + } // calculate subcommand Opt::Calculate { results_dir } => { // get all the calculations or gracefully show the user an exception - let calculations = map_or_gracefully_exit( - &|x| x, - calculate::regressions(&results_dir) - ); + let calculations = map_or_gracefully_exit(&|x| x, calculate::regressions(&results_dir)); // print all calculations to stdout so they can be easily debugged // via CI. @@ -89,18 +91,17 @@ fn main() { // indented json string representation of the calculations array let json_calcs = serde_json::to_string_pretty(&calculations) .expect("Main: Failed to serialize calculations to json"); - + // create the empty destination file, and write the json string let outfile = &mut results_dir.into_os_string(); outfile.push("/final_calculations.json"); let mut f = File::create(outfile).expect("Main: Unable to create file"); - f.write_all(json_calcs.as_bytes()).expect("Main: Unable to write data"); + f.write_all(json_calcs.as_bytes()) + .expect("Main: Unable to write data"); // filter for regressions - let regressions: Vec<&Calculation> = calculations - .iter() - .filter(|c| c.regression) - .collect(); + let regressions: Vec<&Calculation> = + calculations.iter().filter(|c| c.regression).collect(); // exit with non zero exit code if there are regressions match regressions[..] { @@ -117,7 +118,6 @@ fn main() { exit(1) } } - }, - + } } } diff --git a/performance/runner/src/measure.rs b/performance/runner/src/measure.rs index 94b68d2581d..35df2b997c7 100644 --- a/performance/runner/src/measure.rs +++ b/performance/runner/src/measure.rs @@ -1,8 +1,7 @@ use crate::exceptions::IOError; +use std::fs; use std::path::PathBuf; use std::process::{Command, ExitStatus}; -use std::fs; - // `Metric` defines a dbt command that we want to measure on both the // baseline and dev branches. @@ -21,24 +20,25 @@ impl Metric<'_> { } // Calls hyperfine via system command, and returns all the exit codes for each hyperfine run. -pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result, IOError> { - /* +pub fn measure<'a>( + projects_directory: &PathBuf, + dbt_branch: &str, +) -> Result, IOError> { + /* Strategy of this function body: 1. Read all directory names in `projects_directory` 2. Pair `n` projects with `m` metrics for a total of n*m pairs 3. Run hyperfine on each project-metric pair */ - + // To add a new metric to the test suite, simply define it in this list: // TODO: This could be read from a config file in a future version. - let metrics: Vec = vec![ - Metric { - name: "parse", - prepare: "rm -rf target/", - cmd: "dbt parse --no-version-check", - }, - ]; - + let metrics: Vec = vec![Metric { + name: "parse", + prepare: "rm -rf target/", + cmd: "dbt parse --no-version-check", + }]; + fs::read_dir(projects_directory) .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))? .map(|entry| { @@ -49,9 +49,12 @@ pub fn measure<'a>(projects_directory: &PathBuf, dbt_branch: &str) -> Result(projects_directory: &PathBuf, dbt_branch: &str) -> Result Date: Thu, 5 Aug 2021 11:14:30 -0400 Subject: [PATCH 70/81] add fmt check --- .github/workflows/performance.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 94d111f5b9e..85a5db34a24 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -15,6 +15,23 @@ on: jobs: + # checks fmt of runner code + fmt: + name: Cargo fmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + # runs any tests associated with the runner # these tests make sure the runner logic is correct test-runner: From 5f9ed1a83c4d236c6f66ed22d498072b27639122 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 11:15:40 -0400 Subject: [PATCH 71/81] run twice a day --- .github/workflows/performance.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 85a5db34a24..79a1efc9980 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -8,8 +8,8 @@ on: - 'develop' - 'performance-regression-testing' schedule: - # runs every day at 10:05pm - - cron: '5 22 * * *' + # runs twice a day at 10:05am and 10:05pm + - cron: '5 10,22 * * *' # Allows you to run this workflow manually from the Actions tab workflow_dispatch: From 3e1e171c6645b9c4ee3926a6fb00f90b98f19b5f Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 11:31:00 -0400 Subject: [PATCH 72/81] add test for regression calculation --- performance/runner/src/calculate.rs | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index 055cb11be76..bd22eacf03b 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -226,3 +226,47 @@ pub fn regressions(results_directory: &PathBuf) -> Result, Calc } }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_5_percent_regression() { + let dev = Measurement { + command: "some command".to_owned(), + mean: 1.06, + stddev: 1.06, + median: 1.06, + user: 1.06, + system: 1.06, + min: 1.06, + max: 1.06, + times: vec![], + }; + + let baseline = Measurement { + command: "some command".to_owned(), + mean: 1.00, + stddev: 1.00, + median: 1.00, + user: 1.00, + system: 1.00, + min: 1.00, + max: 1.00, + times: vec![], + }; + + let calculations = calculate("test_metric", &dev, &baseline); + let regressions: Vec<&Calculation> = calculations + .iter() + .filter(|calc| calc.regression) + .collect(); + + // expect one regression for median + println!("{:#?}", regressions); + assert_eq!(regressions.len(), 1); + assert_eq!(regressions[0].metric, "median_test_metric"); + } + +} \ No newline at end of file From 2068dd5510c7c2f545c3edc5d5baa1d6d2c9b74e Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 11:31:19 -0400 Subject: [PATCH 73/81] fmt --- performance/runner/src/calculate.rs | 41 +++++++++++++---------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/performance/runner/src/calculate.rs b/performance/runner/src/calculate.rs index bd22eacf03b..aff405d109b 100644 --- a/performance/runner/src/calculate.rs +++ b/performance/runner/src/calculate.rs @@ -235,38 +235,35 @@ mod tests { fn detects_5_percent_regression() { let dev = Measurement { command: "some command".to_owned(), - mean: 1.06, - stddev: 1.06, - median: 1.06, - user: 1.06, - system: 1.06, - min: 1.06, - max: 1.06, - times: vec![], + mean: 1.06, + stddev: 1.06, + median: 1.06, + user: 1.06, + system: 1.06, + min: 1.06, + max: 1.06, + times: vec![], }; let baseline = Measurement { command: "some command".to_owned(), - mean: 1.00, - stddev: 1.00, - median: 1.00, - user: 1.00, - system: 1.00, - min: 1.00, - max: 1.00, - times: vec![], + mean: 1.00, + stddev: 1.00, + median: 1.00, + user: 1.00, + system: 1.00, + min: 1.00, + max: 1.00, + times: vec![], }; let calculations = calculate("test_metric", &dev, &baseline); - let regressions: Vec<&Calculation> = calculations - .iter() - .filter(|calc| calc.regression) - .collect(); + let regressions: Vec<&Calculation> = + calculations.iter().filter(|calc| calc.regression).collect(); // expect one regression for median println!("{:#?}", regressions); assert_eq!(regressions.len(), 1); assert_eq!(regressions[0].metric, "median_test_metric"); } - -} \ No newline at end of file +} From ef63319733ce4472969ba6a24004d7136fdb184c Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 11:39:19 -0400 Subject: [PATCH 74/81] add comment --- .github/workflows/performance.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 79a1efc9980..0c664011c87 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -16,6 +16,8 @@ on: jobs: # checks fmt of runner code + # purposefully not a dependency of any other job + # will block merging, but not prevent developing fmt: name: Cargo fmt runs-on: ubuntu-latest From 87072707edab989b8d8ca0d35ff3f834a58684c6 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 11:40:48 -0400 Subject: [PATCH 75/81] point to manifest --- .github/workflows/performance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 0c664011c87..7b72d85797e 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -32,7 +32,7 @@ jobs: - uses: actions-rs/cargo@v1 with: command: fmt - args: --all -- --check + args: --manifest-path performance/runner/Cargo.toml --all -- --check # runs any tests associated with the runner # these tests make sure the runner logic is correct From 06cc0c57e815b0027ab27f5d667a833b75f0aa90 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 5 Aug 2021 12:27:21 -0400 Subject: [PATCH 76/81] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7159b16983..109aea86258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Handle whitespace after a plus sign on the project config ([#3526](https://github.com/dbt-labs/dbt/pull/3526)) ### Under the hood +- Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602) - Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468)) - Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473)) - Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536)) From b90b3a9c196c812325584b525a4922a05ef7afe8 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 6 Aug 2021 14:46:10 -0400 Subject: [PATCH 77/81] avoid abandoned destructors by refactoring usage of exit --- performance/runner/src/main.rs | 81 +++++++++++++++++----------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index eb3ad01f0a1..856f6b01682 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -5,11 +5,10 @@ mod exceptions; mod measure; use crate::calculate::Calculation; -use std::fmt::Display; +use crate::exceptions::CalculateError; use std::fs::File; use std::io::Write; use std::path::PathBuf; -use std::process::exit; use structopt::StructOpt; // This type defines the commandline interface and is generated @@ -33,21 +32,12 @@ enum Opt { }, } -// Given a result with a displayable error, map the value if there is one, or -// print the human readable error message and exit with exit code 1. -fn map_or_gracefully_exit(f: &dyn Fn(A) -> B, r: Result) -> B { - r.map_or_else( - |e| { - println!("{}", e); - exit(1) - }, - f, - ) -} - -// This is where all the printing, exiting, and error displaying -// should happen. Module functions should only return values. -fn main() { +// enables proper useage of exit() in main. +// https://doc.rust-lang.org/std/process/fn.exit.html#examples +// +// This is where all the printing should happen. Exiting happens +// in main, and module functions should only return values. +fn run_app() -> Result { // match what the user inputs from the cli match Opt::from_args() { // measure subcommand @@ -55,30 +45,26 @@ fn main() { projects_dir, branch_name, } => { - // get all the statuses of the hyperfine runs or - // gracefully show the user an exception - let statuses = - map_or_gracefully_exit(&|x| x, measure::measure(&projects_dir, &branch_name)); - - // only exit with exit code 0 if all hyperfine runs were - // dispatched correctly. - for status in statuses { - match status.code() { - None => (), - Some(0) => (), - // if any child command exited with a non zero status code, exit with the same one here. - Some(nonzero) => { - println!("a child process exited with a nonzero status code."); - exit(nonzero) - } - } - } + // if there are any nonzero exit codes from the hyperfine runs, + // return the first one. otherwise return zero. + measure::measure(&projects_dir, &branch_name) + .or_else(|e| Err(CalculateError::CalculateIOError(e)))? + .iter() + .map(|status| status.code()) + .flatten() + .filter(|code| *code != 0) + .collect::>() + .get(0) + .map_or(Ok(0), |x| { + println!("Main: a child process exited with a nonzero status code."); + Ok(*x) + }) } // calculate subcommand Opt::Calculate { results_dir } => { // get all the calculations or gracefully show the user an exception - let calculations = map_or_gracefully_exit(&|x| x, calculate::regressions(&results_dir)); + let calculations = calculate::regressions(&results_dir)?; // print all calculations to stdout so they can be easily debugged // via CI. @@ -103,21 +89,34 @@ fn main() { let regressions: Vec<&Calculation> = calculations.iter().filter(|c| c.regression).collect(); - // exit with non zero exit code if there are regressions + // return a non-zero exit code if there are regressions match regressions[..] { - [] => println!("congrats! no regressions :)"), + [] => { + println!("congrats! no regressions :)"); + Ok(0) + } _ => { - // print all calculations to stdout so they can be easily debugged - // via CI. + // print all calculations to stdout so they can be easily + // debugged via CI. println!(":: Regressions Found ::\n"); println!(""); for r in regressions { println!("{:#?}\n", r); } println!(""); - exit(1) + Ok(1) } } } } } + +fn main() { + std::process::exit(match run_app() { + Ok(code) => code, + Err(err) => { + eprintln!("{}", err); + 1 + } + }); +} From cd6894acf49063adbd1d82c12d585f5aba5f09ae Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 9 Aug 2021 12:52:53 -0400 Subject: [PATCH 78/81] add to future work --- performance/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/performance/README.md b/performance/README.md index 341e9da4333..a2ae7ef7fe3 100644 --- a/performance/README.md +++ b/performance/README.md @@ -14,3 +14,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu - add more dbt commands to measure - possibly using the uploaded json artifacts to store these results so they can be graphed over time - reading new metrics from a file so no one has to edit rust source to add them to the suite +- instead of building the rust every time, we could publish pull down the latest deployed version. \ No newline at end of file From 355b0c496ea1264d886e4fd2b3927d8f8728dc56 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 9 Aug 2021 12:53:07 -0400 Subject: [PATCH 79/81] remove unnecessary newline printing --- performance/runner/src/main.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/performance/runner/src/main.rs b/performance/runner/src/main.rs index 856f6b01682..28392d11c8d 100644 --- a/performance/runner/src/main.rs +++ b/performance/runner/src/main.rs @@ -72,7 +72,6 @@ fn run_app() -> Result { for c in &calculations { println!("{:#?}\n", c); } - println!(""); // indented json string representation of the calculations array let json_calcs = serde_json::to_string_pretty(&calculations) @@ -99,11 +98,9 @@ fn run_app() -> Result { // print all calculations to stdout so they can be easily // debugged via CI. println!(":: Regressions Found ::\n"); - println!(""); for r in regressions { println!("{:#?}\n", r); } - println!(""); Ok(1) } } From 8609c023832dfbd7a5cb104501b050c2ac4c3458 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 9 Aug 2021 12:53:53 -0400 Subject: [PATCH 80/81] minor change --- performance/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/performance/README.md b/performance/README.md index a2ae7ef7fe3..83a706686f4 100644 --- a/performance/README.md +++ b/performance/README.md @@ -14,4 +14,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu - add more dbt commands to measure - possibly using the uploaded json artifacts to store these results so they can be graphed over time - reading new metrics from a file so no one has to edit rust source to add them to the suite -- instead of building the rust every time, we could publish pull down the latest deployed version. \ No newline at end of file +- instead of building the rust every time, we could publish and pull down the latest version. From 1fe53750faa8fd5d2eb10d59e7ec27a9f569aaa5 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 9 Aug 2021 12:55:42 -0400 Subject: [PATCH 81/81] add more future work --- performance/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/performance/README.md b/performance/README.md index 83a706686f4..71aba30b989 100644 --- a/performance/README.md +++ b/performance/README.md @@ -15,3 +15,4 @@ In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Githu - possibly using the uploaded json artifacts to store these results so they can be graphed over time - reading new metrics from a file so no one has to edit rust source to add them to the suite - instead of building the rust every time, we could publish and pull down the latest version. +- instead of manually setting the baseline version of dbt to test, pull down the latest stable version as the baseline.