Skip to content

Commit

Permalink
fix: safely filter branches
Browse files Browse the repository at this point in the history
chore: appease bandit

chore: appease bandit

fix: always scan head when limiting branches

fix: tidy up limiting

fix: arg parsing

chore: update readme

fix: use branches_t for threshold

chore: readme
  • Loading branch information
SimonGurney committed Jan 18, 2023
1 parent b1ba8bd commit aa5c95a
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 9 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ options:
--disable-trufflehog Scan without trufflehog
--disable-gitleaks Scan without gitleaks
--single-branch Scan only the default branch
--max-branch-count MAX_BRANCH_COUNT
Limit the number of branches scanned per repo
--dont-store-secret Do not store the plaintext secret in the results
--extra-context Output two lines before and after the secret for additional context.
--no-stats Do not output stats summary
Expand All @@ -153,6 +155,7 @@ gitlab:
The access token to use for accessing GitLab.
--gitlab-url GITLAB_URL
URL of the GitLab instance to run against. (default: https://gitlab.com)
bitbucket:
--workspace WORKSPACE
--username USERNAME
Expand Down
7 changes: 7 additions & 0 deletions argparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ def error(self, message):
help="Scan only the default branch",
)

parser.add_argument(
"--max-branch-count",
type=int,
default=20,
help="Limit the number of branches scanned per repo",
)

parser.add_argument(
"--dont-store-secret",
action="store_true",
Expand Down
1 change: 1 addition & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
threshold_date=threshold_date,
validate_https=not args.dont_validate_https,
to_scan_list=to_scan_list,
max_branch_count=args.max_branch_count,
)
pool = ThreadPool(args.parallel_repos)
results = pool.imap_unordered(f, repos)
Expand Down
34 changes: 25 additions & 9 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def onerror(func, path, exc_info):
raise


def get_branches(path, threshold_date=None, single_branch=False):
def get_branches(
path, max_branch_count, repo_name, threshold_date=None, single_branch=False
):
r = GitRepo.init(path)

branches = []
Expand All @@ -53,14 +55,23 @@ def get_branches(path, threshold_date=None, single_branch=False):
if head.is_detached == True and not head.is_remote()
]
)

if threshold_date != None:
branches = list(
filter(
lambda branch: r.commit(branch).committed_date >= threshold_date,
branches,
if threshold_date != None or len(branches) > max_branch_count:
branches_t = []
for branch in branches:
try:
latest_commit = r.commit(branch)
branches_t.append((branch, latest_commit.committed_date))
except: # nosec B112
continue # skip this branch
if threshold_date != None:
branches_t = [(b, v) for b, v in branches_t if v >= threshold_date]
if len(branches_t) > max_branch_count:
print(
f"Repo '{repo_name}' has {len(branches)} branches, only scanning the freshest {max_branch_count}. You can increase this limit"
)
)
branches_t.sort(key=lambda t: t[1], reverse=True)
branches_t = branches_t[0:max_branch_count]
branches = [b for b, v in branches_t]

return branches

Expand Down Expand Up @@ -90,6 +101,7 @@ def process_repo(
threshold_date=None,
validate_https=True,
to_scan_list=None,
max_branch_count=50,
):
if to_scan_list is not None:
print(repo.html_url)
Expand All @@ -103,7 +115,11 @@ def process_repo(
return [ProcessRepoResult(repo, "FAIL", "Could not clone")]

branches = get_branches(
path, threshold_date=threshold_date, single_branch=single_branch
path,
threshold_date=threshold_date,
single_branch=single_branch,
max_branch_count=max_branch_count,
repo_name=repo.name,
)

for branch in branches:
Expand Down

0 comments on commit aa5c95a

Please sign in to comment.