Skip to content

Commit

Permalink
feat: Adds an option to ignore old brancges
Browse files Browse the repository at this point in the history
* feat: Adds a command line option to ignore all branches whose last commit date is below a certain threshold to enable speeding up the time taken to scan repositories.
  • Loading branch information
alexbrozych committed Jan 11, 2023
1 parent b7d8093 commit 9b33100
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 16 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ options:
--dont-store-secret Do not store the plaintext secret in the results
--extra-context Output two lines before and after the secret for additional context.
--no-stats Do not output stats summary
--ignore-branches-older-than IGNORE_BRANCHES_OLDER_THAN
Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00
github/gitlab/azuredevops:
--org ORG Organisation name to target
--pat PAT Personal Access Token for API access and cloning
Expand Down
7 changes: 7 additions & 0 deletions argparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ def error(self, message):
help="Do not output stats summary",
)

parser.add_argument(
"--ignore-branches-older-than",
type=str,
default=None,
help="Ignore branches whose last commit date is before this value. Format is Pythons's expected ISO format e.g. 2020-01-01T00:00:00+00:00",
)


def parse_args():
args = parser.parse_args()
Expand Down
17 changes: 17 additions & 0 deletions features/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ def wantsLongSecret(context):
)


@fixture
def wantsFixedDateSecret(context):
safe_add_rules(
context,
[
["repo aws"],
[
"file aws_key",
"aws_access_key_id = AKIAYVP4CIPPERUVIFXG\n",
"aws_secret_access_key = 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",
],
["commitdate 2020-01-01T00:00:00"],
],
)


def branchTest(context):
safe_add_rules(
context,
Expand Down Expand Up @@ -108,4 +124,5 @@ def branchTest(context):
"wantsAWSSecret": wantsAWSSecret,
"branchTest": branchTest,
"wantsLongSecret": wantsLongSecret,
"wantsFixedDateSecret": wantsFixedDateSecret,
}
33 changes: 33 additions & 0 deletions features/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,30 @@ def step_impl(context, branch_toggle, extra_context, secret_toggle, format, engi
run_secret_magpie(context, engines, outformat=format, args=args)


@when(
"we run secret-magpie-cli in {branch_toggle} branch mode, ignoring commits older than {threshold_date} extra context {extra_context}, secret storing {secret_toggle}, output format {format} and engines: {engines}"
)
def step_impl(
context,
branch_toggle,
threshold_date,
extra_context,
secret_toggle,
format,
engines,
):
args = []
if threshold_date != "None":
args.append(f"--ignore-branches-older-than={threshold_date}")
if extra_context == "enabled":
args.append("--extra-context")
if secret_toggle == "disabled":
args.append("--dont-store-secret")
if branch_toggle == "single":
args.append("--single-branch")
run_secret_magpie(context, engines, outformat=format, args=args)


@then("secret-magpie-cli's output will be")
def step_impl(context):
stdout = context.stdout
Expand Down Expand Up @@ -307,6 +331,15 @@ def __init__(self, rules, dir):
else:
current_repo.index.commit("Commit.")

case "commitdate":
current_repo.git.add(A=True)
if not commit_all:
commit_all = True
if len(rule) > 1:
current_repo.index.commit("Commit.", commit_date=rule[1])
else:
current_repo.index.commit("Commit.")

case "branch":
# If we have content that isn't commit yet
# We should commit it before anything else.
Expand Down
17 changes: 17 additions & 0 deletions features/secret_detection.feature
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,20 @@ Feature: Validate secret detection against various engines.
Scenario: Ensure that we can detect secrets in AzureDevOps organisations
When we run secret-magpie-cli with engines: all
Then there will be 4 secrets detected

@localrepos
@fixture.wantsFixedDateSecret
Scenario: Detect all secrets with fixed dates when we don't ignore secrets
When we run secret-magpie-cli in multi branch mode, ignoring commits older than None extra context disabled, secret storing enabled, output format csv and engines: all
Then there will be 2 secrets detected

@localrepos
@fixture.wantsFixedDateSecret
Scenario Outline: Detect no secrets with fixed dates when we ignore secrets older than 2022-01-01T00:00:00+00:00 in <mode> branch mode.
When we run secret-magpie-cli in <mode> branch mode, ignoring commits older than 2022-01-01T00:00:00+00:00 extra context disabled, secret storing enabled, output format csv and engines: all
Then there will be 0 secrets detected

Examples:
| mode |
| single |
| multi |
9 changes: 9 additions & 0 deletions features/validate_output.feature
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,12 @@ Feature: Validate that the results files produced by secret-magpie-cli is of val
Scenario: Ensure that the date field within the repo is parseable in ISO8601 format.
When we run secret-magpie-cli with engines: all
Then the date column of results.csv will be ISO8601 format

@localrepos
@wantsAWSSecret
Scenario: Ensure that secret-magpie-cli gives the expected error when we run it with an invalid threshold date
When we run secret-magpie-cli in multi branch mode, ignoring commits older than invaliddate extra context disabled, secret storing enabled, output format csv and engines: all
Then secret-magpie-cli's output will be
"""
ERROR: Invalid ISO format string.
"""
15 changes: 15 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,26 @@
import argparsing
import stats
import output
import datetime
import time

if __name__ == "__main__":
print(argparsing.banner)
args = argparsing.parse_args()
cleanup = not (args.no_cleanup or "filesystem" == args.provider)

threshold_date = None
if args.ignore_branches_older_than != None:
try:
threshold_date = time.mktime(
datetime.datetime.fromisoformat(
args.ignore_branches_older_than
).timetuple()
)
except ValueError:
print("ERROR: Invalid ISO format string.")
sys.exit(1)

tool_list = []
if not args.disable_gitleaks:
tool_list.append(tools.gitleaks)
Expand All @@ -29,6 +43,7 @@
single_branch=args.single_branch,
extra_context=args.extra_context,
cleanup=cleanup,
threshold_date=threshold_date,
)
pool = ThreadPool(args.parallel_repos)
results = pool.imap_unordered(f, repos)
Expand Down
51 changes: 36 additions & 15 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,38 @@ def onerror(func, path, exc_info):
raise


def get_branches(path):
def get_branches(path, threshold_date=None, single_branch=False):
r = GitRepo.init(path)

branches = []

if len(r.remotes) > 0:
if single_branch:
branches = ["HEAD"]
else:
if len(r.remotes) > 0:
branches.extend(
[
"remotes/" + x.name
for x in r.remotes[0].refs
if x.is_detached == True
]
)

branches.extend(
["remotes/" + x.name for x in r.remotes[0].refs if x.is_detached == True]
[
head.name
for head in r.heads
if head.is_detached == True and not head.is_remote()
]
)

branches.extend(
[
head.name
for head in r.heads
if head.is_detached == True and not head.is_remote()
]
)
if threshold_date != None:
branches = list(
filter(
lambda branch: r.commit(branch).committed_date >= threshold_date,
branches,
)
)

return branches

Expand All @@ -67,17 +82,23 @@ def __repr__(self):


def process_repo(
repo, functions, single_branch=False, extra_context=False, cleanup=True
repo,
functions,
single_branch=False,
extra_context=False,
cleanup=True,
threshold_date=None,
):
out = []
try:
path = repo.clone_repo()
except:
return [ProcessRepoResult(repo, "FAIL", "Could not clone")]
if not single_branch:
branches = get_branches(path)
else:
branches = ["HEAD"]

branches = get_branches(
path, threshold_date=threshold_date, single_branch=single_branch
)

for branch in branches:
for function in functions:
try:
Expand Down

0 comments on commit 9b33100

Please sign in to comment.