Skip to content

Commit

Permalink
feat: Adds an option to ignore old brancges
Browse files Browse the repository at this point in the history
* feat: Adds a command line option to ignore all branches whose last commit date is below a certain threshold to enable speeding up the time taken to scan repositories.
  • Loading branch information
alexbrozych committed Jan 10, 2023
1 parent b7d8093 commit 367e8ac
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 3 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ options:
--dont-store-secret Do not store the plaintext secret in the results
--extra-context Output two lines before and after the secret for additional context.
--no-stats Do not output stats summary
--ignore-older-than IGNORE_OLDER_THAN
Ignore branches whose last commit date is before this value.
github/gitlab/azuredevops:
--org ORG Organisation name to target
Expand Down
7 changes: 7 additions & 0 deletions argparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ def error(self, message):
help="Do not output stats summary",
)

parser.add_argument(
"--ignore-older-than",
type=str,
default=None,
help="Ignore branches whose last commit date is before this value.",
)


def parse_args():
args = parser.parse_args()
Expand Down
17 changes: 17 additions & 0 deletions features/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ def wantsLongSecret(context):
)


@fixture
def wantsFixedDateSecret(context):
safe_add_rules(
context,
[
["repo aws"],
[
"file aws_key",
"aws_access_key_id = AKIAYVP4CIPPERUVIFXG\n",
"aws_secret_access_key = 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",
],
["commitdate 2020-01-01T00:00:00"],
],
)


def branchTest(context):
safe_add_rules(
context,
Expand Down Expand Up @@ -108,4 +124,5 @@ def branchTest(context):
"wantsAWSSecret": wantsAWSSecret,
"branchTest": branchTest,
"wantsLongSecret": wantsLongSecret,
"wantsFixedDateSecret": wantsFixedDateSecret,
}
33 changes: 33 additions & 0 deletions features/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,30 @@ def step_impl(context, branch_toggle, extra_context, secret_toggle, format, engi
run_secret_magpie(context, engines, outformat=format, args=args)


@when(
"we run secret-magpie-cli in {branch_toggle} branch mode, ignoring commits older than {threshold_date} extra context {extra_context}, secret storing {secret_toggle}, output format {format} and engines: {engines}"
)
def step_impl(
context,
branch_toggle,
threshold_date,
extra_context,
secret_toggle,
format,
engines,
):
args = []
if threshold_date != "None":
args.append(f"--ignore-older-than={threshold_date}")
if extra_context == "enabled":
args.append("--extra-context")
if secret_toggle == "disabled":
args.append("--dont-store-secret")
if branch_toggle == "single":
args.append("--single-branch")
run_secret_magpie(context, engines, outformat=format, args=args)


@then("secret-magpie-cli's output will be")
def step_impl(context):
stdout = context.stdout
Expand Down Expand Up @@ -307,6 +331,15 @@ def __init__(self, rules, dir):
else:
current_repo.index.commit("Commit.")

case "commitdate":
current_repo.git.add(A=True)
if not commit_all:
commit_all = True
if len(rule) > 1:
current_repo.index.commit("Commit.", commit_date=rule[1])
else:
current_repo.index.commit("Commit.")

case "branch":
# If we have content that isn't commit yet
# We should commit it before anything else.
Expand Down
12 changes: 12 additions & 0 deletions features/secret_detection.feature
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,15 @@ Feature: Validate secret detection against various engines.
Scenario: Ensure that we can detect secrets in AzureDevOps organisations
When we run secret-magpie-cli with engines: all
Then there will be 4 secrets detected

@localrepos
@fixture.wantsFixedDateSecret
Scenario: Detect all secrets with fixed dates when we don't ignore secrets
When we run secret-magpie-cli in multi branch mode, ignoring commits older than None extra context disabled, secret storing enabled, output format csv and engines: all
Then there will be 2 secrets detected

@localrepos
@fixture.wantsFixedDateSecret
Scenario: Detect all secrets with fixed dates when we ignore secrets older than 2022-01-01T00:00:00+00:00
When we run secret-magpie-cli in multi branch mode, ignoring commits older than 2022-01-01T00:00:00+00:00 extra context disabled, secret storing enabled, output format csv and engines: all
Then there will be 0 secrets detected
9 changes: 9 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,20 @@
import argparsing
import stats
import output
import datetime
import time

if __name__ == "__main__":
print(argparsing.banner)
args = argparsing.parse_args()
cleanup = not (args.no_cleanup or "filesystem" == args.provider)

threshold_date = None
if args.ignore_older_than != None:
threshold_date = time.mktime(
datetime.datetime.fromisoformat(args.ignore_older_than).timetuple()
)

tool_list = []
if not args.disable_gitleaks:
tool_list.append(tools.gitleaks)
Expand All @@ -29,6 +37,7 @@
single_branch=args.single_branch,
extra_context=args.extra_context,
cleanup=cleanup,
threshold_date=threshold_date,
)
pool = ThreadPool(args.parallel_repos)
results = pool.imap_unordered(f, repos)
Expand Down
19 changes: 16 additions & 3 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def onerror(func, path, exc_info):
raise


def get_branches(path):
def get_branches(path, threshold_date=None):
r = GitRepo.init(path)

branches = []
Expand All @@ -47,6 +47,14 @@ def get_branches(path):
]
)

if threshold_date != None:
branches = list(
filter(
lambda branch: r.commit(branch).committed_date >= threshold_date,
branches,
)
)

return branches


Expand All @@ -67,15 +75,20 @@ def __repr__(self):


def process_repo(
repo, functions, single_branch=False, extra_context=False, cleanup=True
repo,
functions,
single_branch=False,
extra_context=False,
cleanup=True,
threshold_date=None,
):
out = []
try:
path = repo.clone_repo()
except:
return [ProcessRepoResult(repo, "FAIL", "Could not clone")]
if not single_branch:
branches = get_branches(path)
branches = get_branches(path, threshold_date=threshold_date)
else:
branches = ["HEAD"]
for branch in branches:
Expand Down

0 comments on commit 367e8ac

Please sign in to comment.