From 9b331005829ca795c2c3add1b5e87185bb8dd819 Mon Sep 17 00:00:00 2001 From: Alex Brozych Date: Wed, 11 Jan 2023 09:22:30 +0000 Subject: [PATCH] feat: Adds an option to ignore old brancges * feat: Adds a command line option to ignore all branches whose last commit date is below a certain threshold to enable speeding up the time taken to scan repositories. --- README.md | 3 +- argparsing.py | 7 +++++ features/fixtures.py | 17 +++++++++++ features/helper.py | 33 ++++++++++++++++++++ features/secret_detection.feature | 17 +++++++++++ features/validate_output.feature | 9 ++++++ main.py | 15 +++++++++ tasks.py | 51 ++++++++++++++++++++++--------- 8 files changed, 136 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c5a7478..9d1eaf7 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,8 @@ options: --dont-store-secret Do not store the plaintext secret in the results --extra-context Output two lines before and after the secret for additional context. --no-stats Do not output stats summary - + --ignore-branches-older-than IGNORE_BRANCHES_OLDER_THAN + Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00 github/gitlab/azuredevops: --org ORG Organisation name to target --pat PAT Personal Access Token for API access and cloning diff --git a/argparsing.py b/argparsing.py index 604661b..60e9083 100644 --- a/argparsing.py +++ b/argparsing.py @@ -123,6 +123,13 @@ def error(self, message): help="Do not output stats summary", ) +parser.add_argument( + "--ignore-branches-older-than", + type=str, + default=None, + help="Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00", +) + def parse_args(): args = parser.parse_args() diff --git a/features/fixtures.py b/features/fixtures.py index c4b3e1b..f30403b 100644 --- a/features/fixtures.py +++ b/features/fixtures.py @@ -71,6 +71,22 @@ def wantsLongSecret(context): ) +@fixture +def wantsFixedDateSecret(context): + safe_add_rules( + context, + [ + ["repo aws"], + [ + "file aws_key", + "aws_access_key_id = AKIAYVP4CIPPERUVIFXG\n", + "aws_secret_access_key = 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", + ], + ["commitdate 2020-01-01T00:00:00"], + ], + ) + + def branchTest(context): safe_add_rules( context, @@ -108,4 +124,5 @@ def branchTest(context): "wantsAWSSecret": wantsAWSSecret, "branchTest": branchTest, "wantsLongSecret": wantsLongSecret, + "wantsFixedDateSecret": wantsFixedDateSecret, } diff --git a/features/helper.py b/features/helper.py index 66c7ec7..43985e9 100644 --- a/features/helper.py +++ b/features/helper.py @@ -216,6 +216,30 @@ def step_impl(context, branch_toggle, extra_context, secret_toggle, format, engi run_secret_magpie(context, engines, outformat=format, args=args) +@when( + "we run secret-magpie-cli in {branch_toggle} branch mode, ignoring commits older than {threshold_date} extra context {extra_context}, secret storing {secret_toggle}, output format {format} and engines: {engines}" +) +def step_impl( + context, + branch_toggle, + threshold_date, + extra_context, + secret_toggle, + format, + engines, +): + args = [] + if threshold_date != "None": + args.append(f"--ignore-branches-older-than={threshold_date}") + if extra_context == "enabled": + args.append("--extra-context") + if secret_toggle == "disabled": + args.append("--dont-store-secret") + if branch_toggle == "single": + args.append("--single-branch") + run_secret_magpie(context, engines, outformat=format, args=args) + + @then("secret-magpie-cli's output will be") def step_impl(context): stdout = context.stdout @@ -307,6 +331,15 @@ def __init__(self, rules, dir): else: current_repo.index.commit("Commit.") + case "commitdate": + current_repo.git.add(A=True) + if not commit_all: + commit_all = True + if len(rule) > 1: + current_repo.index.commit("Commit.", commit_date=rule[1]) + else: + current_repo.index.commit("Commit.") + case "branch": # If we have content that isn't commit yet # We should commit it before anything else. diff --git a/features/secret_detection.feature b/features/secret_detection.feature index c7c8026..54334f8 100644 --- a/features/secret_detection.feature +++ b/features/secret_detection.feature @@ -60,3 +60,20 @@ Feature: Validate secret detection against various engines. Scenario: Ensure that we can detect secrets in AzureDevOps organisations When we run secret-magpie-cli with engines: all Then there will be 4 secrets detected + + @localrepos + @fixture.wantsFixedDateSecret + Scenario: Detect all secrets with fixed dates when we don't ignore secrets + When we run secret-magpie-cli in multi branch mode, ignoring commits older than None extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 2 secrets detected + + @localrepos + @fixture.wantsFixedDateSecret + Scenario Outline: Detect no secrets with fixed dates when we ignore secrets older than 2022-01-01T00:00:00+00:00 in branch mode. + When we run secret-magpie-cli in branch mode, ignoring commits older than 2022-01-01T00:00:00+00:00 extra context disabled, secret storing enabled, output format csv and engines: all + Then there will be 0 secrets detected + + Examples: + | mode | + | single | + | multi | diff --git a/features/validate_output.feature b/features/validate_output.feature index afe96e9..4fef391 100644 --- a/features/validate_output.feature +++ b/features/validate_output.feature @@ -49,3 +49,12 @@ Feature: Validate that the results files produced by secret-magpie-cli is of val Scenario: Ensure that the date field within the repo is parseable in ISO8601 format. When we run secret-magpie-cli with engines: all Then the date column of results.csv will be ISO8601 format + + @localrepos + @wantsAWSSecret + Scenario: Ensure that secret-magpie-cli gives the expected error when we run it with an invalid threshold date + When we run secret-magpie-cli in multi branch mode, ignoring commits older than invaliddate extra context disabled, secret storing enabled, output format csv and engines: all + Then secret-magpie-cli's output will be + """ + ERROR: Invalid ISO format string. + """ diff --git a/main.py b/main.py index 4bd42b9..cb15f6a 100644 --- a/main.py +++ b/main.py @@ -7,12 +7,26 @@ import argparsing import stats import output +import datetime +import time if __name__ == "__main__": print(argparsing.banner) args = argparsing.parse_args() cleanup = not (args.no_cleanup or "filesystem" == args.provider) + threshold_date = None + if args.ignore_branches_older_than != None: + try: + threshold_date = time.mktime( + datetime.datetime.fromisoformat( + args.ignore_branches_older_than + ).timetuple() + ) + except ValueError: + print("ERROR: Invalid ISO format string.") + sys.exit(1) + tool_list = [] if not args.disable_gitleaks: tool_list.append(tools.gitleaks) @@ -29,6 +43,7 @@ single_branch=args.single_branch, extra_context=args.extra_context, cleanup=cleanup, + threshold_date=threshold_date, ) pool = ThreadPool(args.parallel_repos) results = pool.imap_unordered(f, repos) diff --git a/tasks.py b/tasks.py index 41e6e13..110fa44 100644 --- a/tasks.py +++ b/tasks.py @@ -29,23 +29,38 @@ def onerror(func, path, exc_info): raise -def get_branches(path): +def get_branches(path, threshold_date=None, single_branch=False): r = GitRepo.init(path) branches = [] - if len(r.remotes) > 0: + if single_branch: + branches = ["HEAD"] + else: + if len(r.remotes) > 0: + branches.extend( + [ + "remotes/" + x.name + for x in r.remotes[0].refs + if x.is_detached == True + ] + ) + branches.extend( - ["remotes/" + x.name for x in r.remotes[0].refs if x.is_detached == True] + [ + head.name + for head in r.heads + if head.is_detached == True and not head.is_remote() + ] ) - branches.extend( - [ - head.name - for head in r.heads - if head.is_detached == True and not head.is_remote() - ] - ) + if threshold_date != None: + branches = list( + filter( + lambda branch: r.commit(branch).committed_date >= threshold_date, + branches, + ) + ) return branches @@ -67,17 +82,23 @@ def __repr__(self): def process_repo( - repo, functions, single_branch=False, extra_context=False, cleanup=True + repo, + functions, + single_branch=False, + extra_context=False, + cleanup=True, + threshold_date=None, ): out = [] try: path = repo.clone_repo() except: return [ProcessRepoResult(repo, "FAIL", "Could not clone")] - if not single_branch: - branches = get_branches(path) - else: - branches = ["HEAD"] + + branches = get_branches( + path, threshold_date=threshold_date, single_branch=single_branch + ) + for branch in branches: for function in functions: try: