From a9d809c5fef27f21db2de3796350f7c4a4dab0e4 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 17 Jun 2024 15:49:58 -0400 Subject: [PATCH 1/2] Improve `summarize` - Add columns for status labels - Improve error handling of summary-producing operations (these should not ever fail under normal circumstances, so give a stack trace when they do) - Add more detailed command-level help to `summarize --help` --- crates/noseyparker-cli/src/args.rs | 18 ++++++ crates/noseyparker-cli/src/cmd_scan.rs | 9 ++- crates/noseyparker-cli/src/cmd_summarize.rs | 22 ++++++- ...oseyparker__datastore__export_empty-2.snap | 4 +- ...t_noseyparker__help__help_summarize-2.snap | 18 ++++++ ...ppmaker__scan_workflow_from_git_url-2.snap | 16 ++--- ...yparker__scan__basic__scan_secrets1-2.snap | 8 +-- ...ength__scan_changing_snippet_length-2.snap | 8 +-- ...ength__scan_changing_snippet_length-9.snap | 8 +-- crates/noseyparker/src/datastore.rs | 58 ++++++++++++++++++- .../src/datastore/finding_summary.rs | 25 +++++--- 11 files changed, 152 insertions(+), 42 deletions(-) diff --git a/crates/noseyparker-cli/src/args.rs b/crates/noseyparker-cli/src/args.rs index 4034b95c1..9c94186b2 100644 --- a/crates/noseyparker-cli/src/args.rs +++ b/crates/noseyparker-cli/src/args.rs @@ -224,6 +224,24 @@ pub enum Command { Scan(ScanArgs), /// Summarize scan findings + /// + /// Findings are summarized in tabular form. + /// The default `human` format prints a table of findings with one row for each rule that produced findings. + /// The table has several columns: + /// + /// - Rule: the name of the rule + /// + /// - Findings: the number of findings, i.e., the number of distinct match group values produced by the rule + /// + /// - Matches: the number of individual matches + /// + /// - Accepted: the number of findings whose matches have `accept` status + /// + /// - Rejected: the number of findings whose matches have `reject` status + /// + /// - Mixed: the number of findings whose matches have a mix of `accept` and `reject` status + /// + /// - Unlabeled: the number of findings whose matches have no status at all #[command(display_order = 2, alias = "summarise")] Summarize(SummarizeArgs), diff --git a/crates/noseyparker-cli/src/cmd_scan.rs b/crates/noseyparker-cli/src/cmd_scan.rs index 2127b987c..9765d1fc0 100644 --- a/crates/noseyparker-cli/src/cmd_scan.rs +++ b/crates/noseyparker-cli/src/cmd_scan.rs @@ -657,10 +657,13 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::ScanArgs) -> Result<()> } if num_matches > 0 { - let matches_summary = datastore.get_summary()?; - let matches_table = crate::cmd_summarize::summary_table(&matches_summary); + let summary = datastore + .get_summary() + .context("Failed to get finding summary") + .unwrap(); + let table = crate::cmd_summarize::summary_table(&summary); println!(); - matches_table.print_tty(global_args.use_color(std::io::stdout()))?; + table.print_tty(global_args.use_color(std::io::stdout()))?; } println!("\nRun the `report` command next to show finding details."); diff --git a/crates/noseyparker-cli/src/cmd_summarize.rs b/crates/noseyparker-cli/src/cmd_summarize.rs index e76a8aef2..02f698794 100644 --- a/crates/noseyparker-cli/src/cmd_summarize.rs +++ b/crates/noseyparker-cli/src/cmd_summarize.rs @@ -53,7 +53,11 @@ pub fn run(global_args: &GlobalArgs, args: &SummarizeArgs) -> Result<()> { .output_args .get_writer() .context("Failed to get output writer")?; - FindingSummaryReporter(datastore.get_summary()?).report(args.output_args.format, output) + let summary = datastore + .get_summary() + .context("Failed to get finding summary") + .unwrap(); + FindingSummaryReporter(summary).report(args.output_args.format, output) } pub fn summary_table(summary: &FindingSummary) -> prettytable::Table { @@ -73,11 +77,23 @@ pub fn summary_table(summary: &FindingSummary) -> prettytable::Table { row![ l -> &e.rule_name, r -> HumanCount(e.distinct_count.try_into().unwrap()), - r -> HumanCount(e.total_count.try_into().unwrap()) + r -> HumanCount(e.total_count.try_into().unwrap()), + r -> HumanCount(e.accept_count.try_into().unwrap()), + r -> HumanCount(e.reject_count.try_into().unwrap()), + r -> HumanCount(e.mixed_count.try_into().unwrap()), + r -> HumanCount(e.unlabeled_count.try_into().unwrap()), ] }) .collect(); table.set_format(f); - table.set_titles(row![lb -> "Rule", cb -> "Total Findings", cb -> "Total Matches"]); + table.set_titles(row![ + lb -> "Rule", + cb -> "Findings", + cb -> "Matches", + cb -> "Accepted", + cb -> "Rejected", + cb -> "Mixed", + cb -> "Unlabeled", + ]); table } diff --git a/crates/noseyparker-cli/tests/datastore/snapshots/test_noseyparker__datastore__export_empty-2.snap b/crates/noseyparker-cli/tests/datastore/snapshots/test_noseyparker__datastore__export_empty-2.snap index e0497dfd8..82ffe52d0 100644 --- a/crates/noseyparker-cli/tests/datastore/snapshots/test_noseyparker__datastore__export_empty-2.snap +++ b/crates/noseyparker-cli/tests/datastore/snapshots/test_noseyparker__datastore__export_empty-2.snap @@ -2,5 +2,5 @@ source: crates/noseyparker-cli/tests/datastore/mod.rs expression: stdout --- - Rule Total Findings Total Matches -─────────────────────────────────────── + Rule Findings Matches Accepted Rejected Mixed Unlabeled +───────────────────────────────────────────────────────────────────── diff --git a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_summarize-2.snap b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_summarize-2.snap index 5bd05546c..3f466a5be 100644 --- a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_summarize-2.snap +++ b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_summarize-2.snap @@ -4,6 +4,24 @@ expression: stdout --- Summarize scan findings +Findings are summarized in tabular form. The default `human` format prints a table of findings with +one row for each rule that produced findings. The table has several columns: + +- Rule: the name of the rule + +- Findings: the number of findings, i.e., the number of distinct match group values produced by the +rule + +- Matches: the number of individual matches + +- Accepted: the number of findings whose matches have `accept` status + +- Rejected: the number of findings whose matches have `reject` status + +- Mixed: the number of findings whose matches have a mix of `accept` and `reject` status + +- Unlabeled: the number of findings whose matches have no status at all + Usage: noseyparker summarize [OPTIONS] Options: diff --git a/crates/noseyparker-cli/tests/scan/appmaker/snapshots/test_noseyparker__scan__appmaker__scan_workflow_from_git_url-2.snap b/crates/noseyparker-cli/tests/scan/appmaker/snapshots/test_noseyparker__scan__appmaker__scan_workflow_from_git_url-2.snap index 4f5c73f05..d138414f3 100644 --- a/crates/noseyparker-cli/tests/scan/appmaker/snapshots/test_noseyparker__scan__appmaker__scan_workflow_from_git_url-2.snap +++ b/crates/noseyparker-cli/tests/scan/appmaker/snapshots/test_noseyparker__scan__appmaker__scan_workflow_from_git_url-2.snap @@ -2,11 +2,11 @@ source: crates/noseyparker-cli/tests/scan/appmaker/mod.rs expression: stdout --- - Rule Total Findings Total Matches -───────────────────────────────────────────────────────────── - AWS API Key 3 3 - AWS S3 Bucket (path style) 3 13 - Amazon Resource Name 3 3 - Generic Secret 3 3 - AWS API Credentials 1 1 - AWS Secret Access Key 1 1 + Rule Findings Matches Accepted Rejected Mixed Unlabeled +─────────────────────────────────────────────────────────────────────────────────────────── + AWS API Credentials 1 1 0 0 0 1 + AWS API Key 3 3 0 0 0 3 + AWS S3 Bucket (path style) 3 13 0 0 0 3 + AWS Secret Access Key 1 1 0 0 0 1 + Amazon Resource Name 3 3 0 0 0 3 + Generic Secret 3 3 0 0 0 3 diff --git a/crates/noseyparker-cli/tests/scan/basic/snapshots/test_noseyparker__scan__basic__scan_secrets1-2.snap b/crates/noseyparker-cli/tests/scan/basic/snapshots/test_noseyparker__scan__basic__scan_secrets1-2.snap index 617bf14f4..968f77533 100644 --- a/crates/noseyparker-cli/tests/scan/basic/snapshots/test_noseyparker__scan__basic__scan_secrets1-2.snap +++ b/crates/noseyparker-cli/tests/scan/basic/snapshots/test_noseyparker__scan__basic__scan_secrets1-2.snap @@ -2,8 +2,6 @@ source: crates/noseyparker-cli/tests/scan/basic/mod.rs expression: stdout --- - - Rule Total Findings Total Matches -─────────────────────────────────────────────────────────────── - GitHub Personal Access Token 1 1 - + Rule Findings Matches Accepted Rejected Mixed Unlabeled +───────────────────────────────────────────────────────────────────────────────────────────── + GitHub Personal Access Token 1 1 0 0 0 1 diff --git a/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-2.snap b/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-2.snap index 4c2538862..691102593 100644 --- a/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-2.snap +++ b/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-2.snap @@ -2,8 +2,6 @@ source: crates/noseyparker-cli/tests/scan/snippet_length/mod.rs expression: stdout --- - - Rule Total Findings Total Matches -─────────────────────────────────────────────────────────────── - GitHub Personal Access Token 1 1 - + Rule Findings Matches Accepted Rejected Mixed Unlabeled +───────────────────────────────────────────────────────────────────────────────────────────── + GitHub Personal Access Token 1 1 0 0 0 1 diff --git a/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-9.snap b/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-9.snap index 4c2538862..691102593 100644 --- a/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-9.snap +++ b/crates/noseyparker-cli/tests/scan/snippet_length/snapshots/test_noseyparker__scan__snippet_length__scan_changing_snippet_length-9.snap @@ -2,8 +2,6 @@ source: crates/noseyparker-cli/tests/scan/snippet_length/mod.rs expression: stdout --- - - Rule Total Findings Total Matches -─────────────────────────────────────────────────────────────── - GitHub Personal Access Token 1 1 - + Rule Findings Matches Accepted Rejected Mixed Unlabeled +───────────────────────────────────────────────────────────────────────────────────────────── + GitHub Personal Access Token 1 1 0 0 0 1 diff --git a/crates/noseyparker/src/datastore.rs b/crates/noseyparker/src/datastore.rs index 6cd218b80..348bf5033 100644 --- a/crates/noseyparker/src/datastore.rs +++ b/crates/noseyparker/src/datastore.rs @@ -519,16 +519,68 @@ impl Datastore { pub fn get_summary(&self) -> Result { let _span = debug_span!("Datastore::get_summary", "{}", self.root_dir.display()).entered(); + // XXX this should be moved into a view in the datastore (probably should replace + // `finding_summary`), but it is inlined here instead to avoid a schema migration for now let mut stmt = self.conn.prepare_cached(indoc! {r#" - select rule_name, total_findings, total_matches - from finding_summary - order by total_findings desc, rule_name, total_matches desc + with + -- table of relevant per-match information + m as ( + select + f.finding_id finding_id, + r.name rule_name, + r.structural_id rule_structural_id, + ms.status match_status + from + finding f + inner join match m on (m.finding_id = f.id) + inner join rule r on (f.rule_id = r.id) + left outer join match_status ms on (m.id = ms.match_id) + ), + -- summarize per-match information by finding + f as ( + select + finding_id, + rule_name, + rule_structural_id, + case group_concat(distinct match_status) + when 'accept' then 'accept' + when 'reject' then 'reject' + when 'accept,reject' then 'mixed' + when 'reject,accept' then 'mixed' + end finding_status, + count(*) num_matches, + sum(case when match_status = 'accept' then 1 else 0 end) num_accept_matches, + sum(case when match_status = 'reject' then 1 else 0 end) num_reject_matches, + sum(case when match_status is null then 1 else 0 end) num_unlabeled_matches + from m + group by finding_id + ) + select + rule_name, + -- rule_structural_id, + count(distinct finding_id) total_findings, + sum(num_matches) total_matches, + sum(case when finding_status = 'accept' then 1 else 0 end) accept_findings, + sum(case when finding_status = 'reject' then 1 else 0 end) reject_findings, + sum(case when finding_status = 'mixed' then 1 else 0 end) mixed_findings, + sum(case when finding_status is null then 1 else 0 end) unlabeled_findings + -- , + -- sum(num_accept_matches) accept_matches, + -- sum(num_reject_matches) reject_matches, + -- sum(num_unlabeled_matches) unlabeled_matches + from + f + group by rule_name, rule_structural_id "#})?; let entries = stmt.query_map((), |row| { Ok(FindingSummaryEntry { rule_name: row.get(0)?, distinct_count: row.get(1)?, total_count: row.get(2)?, + accept_count: row.get(3)?, + reject_count: row.get(4)?, + mixed_count: row.get(5)?, + unlabeled_count: row.get(6)?, }) })?; let es = collect(entries)?; diff --git a/crates/noseyparker/src/datastore/finding_summary.rs b/crates/noseyparker/src/datastore/finding_summary.rs index 787f355af..157ca41a3 100644 --- a/crates/noseyparker/src/datastore/finding_summary.rs +++ b/crates/noseyparker/src/datastore/finding_summary.rs @@ -10,16 +10,25 @@ pub struct FindingSummary(pub Vec); #[derive(Serialize)] pub struct FindingSummaryEntry { + /// The rule name of this entry pub rule_name: String, + + /// The number of findings with this rule pub distinct_count: usize, + + /// The number of matches with this rule pub total_count: usize, -} -impl std::fmt::Display for FindingSummary { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for entry in self.0.iter() { - writeln!(f, "{}: {} ({})", entry.rule_name, entry.distinct_count, entry.total_count)?; - } - Ok(()) - } + /// The number of findings with this rule with the `accept` status + pub accept_count: usize, + + /// The number of findings with this rule with the `reject` status + pub reject_count: usize, + + /// The number of findings with this rule with a mixed status, i.e., both `reject` and `accept` + /// status + pub mixed_count: usize, + + /// The number of findings with this rule that have no assigned status + pub unlabeled_count: usize, } From a453bb27f253088b88fdb05b87bc1cbf38e01cdc Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 17 Jun 2024 15:53:37 -0400 Subject: [PATCH 2/2] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e52756265..3afbc1b44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), Having the debug symbols available makes stack traces more useful in the rare event of a crash. The Alpine-based Docker image does not include these debug symbols, as its point of existing is to provide a small distribution. +- The `summarize` command now includes additional columns for the assigned finding status ([#196](https://github.com/praetorian-inc/noseyparker/pull/196)). ### Changes