Skip to content

Commit

Permalink
perf: parallelize stat collection with rayon
Browse files Browse the repository at this point in the history
By collecting individual user stats in parallel, we dramatically speed
up the time it takes to get stats on repositories with large histories.

```
❯ hyperfine 'git-stats-old HEAD' 'git-stats-new HEAD'
Benchmark 1: git-stats-old HEAD
  Time (mean ± σ):      6.141 s ±  0.017 s    [User: 5.652 s, System: 0.509 s]
  Range (min … max):    6.120 s …  6.168 s    10 runs

Benchmark 2: git-stats-new HEAD
  Time (mean ± σ):     772.7 ms ±  57.5 ms    [User: 8142.8 ms, System: 742.6 ms]
  Range (min … max):   723.5 ms … 859.2 ms    10 runs

Summary
  'git-stats-new HEAD' ran
    7.95 ± 0.59 times faster than 'git-stats-old HEAD'
```
  • Loading branch information
lukehsiao committed Oct 27, 2022
1 parent a7dd96a commit e6783b4
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 41 deletions.
107 changes: 106 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ clap = { version = "4.0.18", features = ["wrap_help", "derive"] }
clap-verbosity-flag = "2.0.0"
env_logger = "0.9.1"
log = "0.4.17"
rayon = "1.5.3"
tabled = "0.10.0"
xshell = "0.2.2"
83 changes: 43 additions & 40 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use anyhow::{bail, Result};
use clap::Parser;
use clap_verbosity_flag::{Verbosity, WarnLevel};
use log::info;
use rayon::prelude::*;
use tabled::{object::Columns, Alignment, Modify, Style, Table, Tabled};

use xshell::{cmd, Shell};
Expand Down Expand Up @@ -69,46 +70,48 @@ fn main() -> Result<()> {
.collect::<_>();

if !shortlog.is_empty() {
let mut stats = vec![];

for (commits, author) in shortlog {
let raw_stats = cmd!(
sh,
"git log -F --author={author} --pretty=tformat: --numstat {rev_range}"
)
.read()?;
info!(
"author: {}, commits: {}, raw_stats: {}",
author, commits, raw_stats
);
let mut insertions = 0;
let mut deletions = 0;
let mut num_files = 0;
for line in raw_stats.lines() {
let mut chunks = line.split_whitespace();
insertions += match chunks.next() {
// For binary files
Some("-") => 0,
Some(n) => usize::from_str(n)?,
None => bail!("Invalid shortlog line"),
};
deletions += match chunks.next() {
// For binary files
Some("-") => 0,
Some(n) => usize::from_str(n)?,
None => bail!("Invalid shortlog line"),
};
num_files += 1;
}
let stat = Stat {
author: author.to_string(),
commits,
insertions,
deletions,
num_files,
};
stats.push(stat);
}
let stats: Vec<Stat> = shortlog
.par_iter()
.map(|(commits, author)| {
let sh = Shell::new()?;
let raw_stats = cmd!(
sh,
"git log -F --author={author} --pretty=tformat: --numstat {rev_range}"
)
.read()?;
info!(
"author: {}, commits: {}, raw_stats: {}",
author, commits, raw_stats
);
let mut insertions = 0;
let mut deletions = 0;
let mut num_files = 0;
for line in raw_stats.lines() {
let mut chunks = line.split_whitespace();
insertions += match chunks.next() {
// For binary files
Some("-") => 0,
Some(n) => usize::from_str(n)?,
None => bail!("Invalid shortlog line"),
};
deletions += match chunks.next() {
// For binary files
Some("-") => 0,
Some(n) => usize::from_str(n)?,
None => bail!("Invalid shortlog line"),
};
num_files += 1;
}
Ok(Stat {
author: author.to_string(),
commits: *commits,
insertions,
deletions,
num_files,
})
})
.filter_map(|r| r.ok())
.collect::<_>();

let mut table = Table::new(stats);
table
Expand Down

0 comments on commit e6783b4

Please sign in to comment.