From e6783b458920d64fd18a5062fe545970f76ae765 Mon Sep 17 00:00:00 2001 From: Luke Hsiao Date: Thu, 27 Oct 2022 07:41:22 -0700 Subject: [PATCH] perf: parallelize stat collection with rayon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By collecting individual user stats in parallel, we dramatically speed up the time it takes to get stats on repositories with large histories. ``` ❯ hyperfine 'git-stats-old HEAD' 'git-stats-new HEAD' Benchmark 1: git-stats-old HEAD Time (mean ± σ): 6.141 s ± 0.017 s [User: 5.652 s, System: 0.509 s] Range (min … max): 6.120 s … 6.168 s 10 runs Benchmark 2: git-stats-new HEAD Time (mean ± σ): 772.7 ms ± 57.5 ms [User: 8142.8 ms, System: 742.6 ms] Range (min … max): 723.5 ms … 859.2 ms 10 runs Summary 'git-stats-new HEAD' ran 7.95 ± 0.59 times faster than 'git-stats-old HEAD' ``` --- Cargo.lock | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/main.rs | 83 ++++++++++++++++++++-------------------- 3 files changed, 150 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a8f88e5..2ba4df1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.3.2" @@ -100,6 +106,55 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f916dfc5d356b0ed9dae65f1db9fc9770aa2851d2662b988ccf4fe3516e86348" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edbafec5fa1f196ca66527c1b12c2ec4745ca14b50f1ad8f9f6f720b55d11fac" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + [[package]] name = "env_logger" version = "0.9.1" @@ -142,13 +197,14 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "git-stats" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "clap", "clap-verbosity-flag", "env_logger", "log", + "rayon", "tabled", "xshell", ] @@ -207,6 +263,25 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.15.0" @@ -272,6 +347,30 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "regex" version = "1.6.0" @@ -303,6 +402,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "strsim" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index 37003a5..8ff34b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,5 +18,6 @@ clap = { version = "4.0.18", features = ["wrap_help", "derive"] } clap-verbosity-flag = "2.0.0" env_logger = "0.9.1" log = "0.4.17" +rayon = "1.5.3" tabled = "0.10.0" xshell = "0.2.2" diff --git a/src/main.rs b/src/main.rs index bfbc89a..72bd978 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use anyhow::{bail, Result}; use clap::Parser; use clap_verbosity_flag::{Verbosity, WarnLevel}; use log::info; +use rayon::prelude::*; use tabled::{object::Columns, Alignment, Modify, Style, Table, Tabled}; use xshell::{cmd, Shell}; @@ -69,46 +70,48 @@ fn main() -> Result<()> { .collect::<_>(); if !shortlog.is_empty() { - let mut stats = vec![]; - - for (commits, author) in shortlog { - let raw_stats = cmd!( - sh, - "git log -F --author={author} --pretty=tformat: --numstat {rev_range}" - ) - .read()?; - info!( - "author: {}, commits: {}, raw_stats: {}", - author, commits, raw_stats - ); - let mut insertions = 0; - let mut deletions = 0; - let mut num_files = 0; - for line in raw_stats.lines() { - let mut chunks = line.split_whitespace(); - insertions += match chunks.next() { - // For binary files - Some("-") => 0, - Some(n) => usize::from_str(n)?, - None => bail!("Invalid shortlog line"), - }; - deletions += match chunks.next() { - // For binary files - Some("-") => 0, - Some(n) => usize::from_str(n)?, - None => bail!("Invalid shortlog line"), - }; - num_files += 1; - } - let stat = Stat { - author: author.to_string(), - commits, - insertions, - deletions, - num_files, - }; - stats.push(stat); - } + let stats: Vec = shortlog + .par_iter() + .map(|(commits, author)| { + let sh = Shell::new()?; + let raw_stats = cmd!( + sh, + "git log -F --author={author} --pretty=tformat: --numstat {rev_range}" + ) + .read()?; + info!( + "author: {}, commits: {}, raw_stats: {}", + author, commits, raw_stats + ); + let mut insertions = 0; + let mut deletions = 0; + let mut num_files = 0; + for line in raw_stats.lines() { + let mut chunks = line.split_whitespace(); + insertions += match chunks.next() { + // For binary files + Some("-") => 0, + Some(n) => usize::from_str(n)?, + None => bail!("Invalid shortlog line"), + }; + deletions += match chunks.next() { + // For binary files + Some("-") => 0, + Some(n) => usize::from_str(n)?, + None => bail!("Invalid shortlog line"), + }; + num_files += 1; + } + Ok(Stat { + author: author.to_string(), + commits: *commits, + insertions, + deletions, + num_files, + }) + }) + .filter_map(|r| r.ok()) + .collect::<_>(); let mut table = Table::new(stats); table