Skip to content

Commit

Permalink
Merge pull request #430 from GitGuardian/alina/cor-769/fix-double-pol…
Browse files Browse the repository at this point in the history
…icy-break-in-result

fix: same secret should be shown once in the results output for scan command
  • Loading branch information
agateau-gg authored Nov 25, 2022
2 parents f4f5d43 + c278228 commit 8669b23
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 16 deletions.
19 changes: 6 additions & 13 deletions ggshield/scan/repo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import itertools
import os
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
Expand Down Expand Up @@ -73,8 +72,10 @@ def scan_commits_content(
ignored_detectors: Optional[Set[str]] = None,
ignore_known_secrets: bool = False,
) -> ScanCollection: # pragma: no cover
commit_files_tuples = []
try:
commit_files = list(itertools.chain.from_iterable(c.files for c in commits))
commit_files_tuples = [(c, f) for c in commits for f in c.files]

progress_callback(advance=len(commits))
scanner = SecretScanner(
client=client,
Expand All @@ -85,28 +86,20 @@ def scan_commits_content(
ignore_known_secrets=ignore_known_secrets,
)
results = scanner.scan(
commit_files,
[file for _, file in commit_files_tuples],
scan_threads=SCAN_THREADS,
)
except Exception as exc:
results = Results.from_exception(exc)

scans = []
for commit in commits:
concerned_results = [
res
for res in results.results
if any(
res.content == file.document and res.filename == file.filename
for file in commit.files
)
]
for (commit, _), result in zip(commit_files_tuples, results.results):
scans.append(
ScanCollection(
commit.sha or "unknown",
type="commit",
results=Results(
results=concerned_results,
results=[result],
errors=results.errors,
),
optional_header=commit.optional_header,
Expand Down
57 changes: 54 additions & 3 deletions tests/unit/scan/test_scan_repo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from copy import deepcopy
from typing import List
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch

import pytest

from ggshield.scan import Commit, File
from ggshield.scan.repo import get_commits_by_batch
from ggshield.core.utils import Filemode
from ggshield.scan import Commit, File, Result, Results
from ggshield.scan.repo import get_commits_by_batch, scan_commits_content
from tests.unit.conftest import TWO_POLICY_BREAKS


@pytest.mark.parametrize(
Expand Down Expand Up @@ -65,3 +68,51 @@ def test_get_commits_content_by_batch(
assert len(batches) == len(expected_batches)
for (batch, expected_batch) in zip(batches, expected_batches):
assert len(batch) == len(expected_batch)


@patch("ggshield.scan.repo.SecretScanner")
def test_scan_2_commits_same_content(secret_scanner_mock):
"""
GIVEN 2 commits where each commit has a file with same content and same filename
WHEN scan_commits_content returns 2 policy break for each commit
THEN the total number of policy breaks is 4
"""
commit_1 = Commit(sha="some_sha_1")
commit_1._files = [File(document="document", filename="filename")]

commit_2 = Commit(sha="some_sha_2")
commit_2._files = [File(document="document", filename="filename")]

secret_scanner_mock.return_value.scan.return_value = Results(
results=[
Result(
filename="filename",
content="document",
filemode=Filemode.NEW,
scan=deepcopy(TWO_POLICY_BREAKS),
),
Result(
filename="filename",
content="document",
filemode=Filemode.NEW,
scan=deepcopy(TWO_POLICY_BREAKS),
),
],
errors=[],
)

scan_collection = scan_commits_content(
commits=[commit_1, commit_2],
client=MagicMock(),
cache=MagicMock(),
matches_ignore=[],
scan_context=MagicMock(),
progress_callback=(lambda advance: None),
)

assert len(scan_collection.scans) == 2

all_policy_breaks_count = sum(
result.scan.policy_break_count for result in scan_collection.get_all_results()
)
assert all_policy_breaks_count == 4

0 comments on commit 8669b23

Please sign in to comment.