Skip to content

Commit

Permalink
Exclusion reasons (#286)
Browse files Browse the repository at this point in the history
* Supporting new format of excluding signatures

* Creating a property for combining exclusions and updating tests

* Fixing unit test failures

* Adding an entry into changelog and updating messages

* Updating comment

* Addressing review comments
  • Loading branch information
mayuriesha authored Nov 29, 2021
1 parent 8348951 commit 06a3833
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 27 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Features:
* [#268](https://github.com/godaddy/tartufo/issues/268) - Adds a new
`--recurse / --no-recurse` flag which allows users to recursively scan the entire directory or just
the root directory
* [#202](https://github.com/godaddy/tartufo/issues/202) - Supports new format of exclusions in config file
with the ability to specify the reason along with exclusion

v3.0.0-alpha.1 - 11 November 2021
---------------------------------
Expand Down
13 changes: 10 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,18 @@ exclude-signatures = [
"d039c652f27c4d42026c5b5c9be31bfe368b283b24248e98d92f131272580053",

# test secrets
"d26b223efb2087d4a3db7b3ff0f65362372c46190fd74b8061db8099f4a2ee60", # tests/data/scan_folder/donotscan.txt
"94c45f9acae0551a2438fa8932fac466e903fd9a09bd2e11897198b133937ccd", # tests/data/scan_folder/test.txt
"6c70b4a712ada69f2a6c919edc783cf485b6637e3c5429de376ef1a451a042e4", # tests/data/scan_folder/donotscan.txt
"357841766ca69979608e4e7fb1da2addeafe6d15cade15466c39d903cc0488a8", # tests/data/scan_folder/test.txt
"743b893e30777d9c4e28d3d341ca4ba1c2541a9c62b497dece75c2a64420e770", # tests/test_folder_scanner.py
"9d1b278fa384c9dc58d607130d97740910309d13b29f13f7ed6348738ea4f32c", # tests/data/scan_folder/scan_sub_folder/sub_folder_test.txt
"358a1ae040ab71b3aafeb4318e5d9e318ff1b681c5bf0e99d381584a33ee7833", # tests/data/scan_folder/scan_sub_folder/sub_folder_test.txt
]

exclude-findings = [
{signature="9d1b278fa384c9dc58d607130d97740910309d13b29f13f7ed6348738ea4f32c", reason="High entropy string added in test_folder_scanner file on 16th November 2021"},
{signature="9f675d4d7f43e484e07fb199a1e7ec372ef204631c1f4ec054acc3119fbde4cf", reason="High entropy string added in build_and_publish_docker file on 13th June 2019"},
{signature="88152f1d10077f417ee5aea5fa86adaa5b74d3f2de2e87ce4b4aa5cd79c7b2f5", reason="High entropy string added in nothing file on 30th December 2016"},
]

json = false
regex = true
repo-path = "."
Expand Down
15 changes: 14 additions & 1 deletion tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,25 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
"-e",
"--exclude-signatures",
multiple=True,
help="Specify signatures of matches that you explicitly want to exclude "
help="[DEPRECATED] Use the exclude-findings config option instead. "
"Specify signatures of matches that you explicitly want to exclude "
"from the scan, and mark as okay. These signatures are generated during "
"the scan process, and reported out with each individual match. This "
"option can be specified multiple times, to exclude as many signatures as "
"you would like.",
)
@click.option(
"-ef",
"--exclude-findings",
hidden=True,
multiple=True,
help="Specify signatures of matches that you explicitly want to exclude "
"from the scan along with the reason, and mark as okay. These signatures "
"are generated during the scan process, and reported out with each"
"individual match. This option can be specified multiple times, "
"to exclude as many signatures as you would like. "
"{signature='signature', reason='The reason of excluding the signature'}",
)
@click.option(
"-od",
"--output-dir",
Expand Down
46 changes: 35 additions & 11 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def __bytes__(self) -> bytes:
return self.__str__().encode("utf8")


# pylint: disable=too-many-public-methods
class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
"""Provide the base, generic functionality needed by all scanners.
Expand All @@ -149,6 +150,8 @@ class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
global_options: types.GlobalOptions
logger: logging.Logger
_scan_lock: threading.Lock = threading.Lock()
_excluded_findings: Tuple[str, ...] = ()
_config_data: MutableMapping[str, Any] = {}

def __init__(self, options: types.GlobalOptions) -> None:
self.global_options = options
Expand Down Expand Up @@ -336,6 +339,35 @@ def should_scan(self, file_path: str):
return False
return True

@property
def config_data(self):
return self._config_data

@config_data.setter
def config_data(self, data: MutableMapping[str, Any]) -> None:
self._config_data = data

@cached_property
def excluded_findings(self) -> tuple:
configured_signatures = []
signatures = self.config_data.get("exclude_signatures", None)
if signatures:
warnings.warn(
"--exclude-signatures has been deprecated and will be removed in a future version. "
"Make sure all the exclusions are moved to exclude-findings section with new format. Example: "
"exclude-findings = [{signature='signature', reason='The reason of excluding the signature'}]",
DeprecationWarning,
)
configured_signatures.extend(signatures)
findings = self.config_data.get("exclude_findings", None)
if findings:
configured_signatures.extend([finding["signature"] for finding in findings])

self._excluded_findings = tuple(
set(self.global_options.exclude_signatures + tuple(configured_signatures))
)
return self._excluded_findings

def signature_is_excluded(self, blob: str, file_path: str) -> bool:
"""Find whether the signature of some data has been excluded in configuration.
Expand All @@ -344,9 +376,8 @@ def signature_is_excluded(self, blob: str, file_path: str) -> bool:
"""
return (
blob
in self.global_options.exclude_signatures # Signatures themselves pop up as entropy matches
or util.generate_signature(blob, file_path)
in self.global_options.exclude_signatures
in self.excluded_findings # Signatures themselves pop up as entropy matches
or util.generate_signature(blob, file_path) in self.excluded_findings
)

@staticmethod
Expand Down Expand Up @@ -660,14 +691,7 @@ def load_repo(self, repo_path: str) -> pygit2.Repository:
except (FileNotFoundError, types.ConfigException):
config_file = None
if config_file and config_file != self.global_options.config:
signatures = data.get("exclude_signatures", None)
if signatures:
self.global_options.exclude_signatures = tuple(
set(self.global_options.exclude_signatures + tuple(signatures))
)
entropy_patterns = data.get("exclude_entropy_patterns", None)
if entropy_patterns:
self.global_options.exclude_entropy_patterns += tuple(entropy_patterns)
self.config_data = data
include_patterns = list(data.get("include_path_patterns", ()))
repo_include_file = data.get("include_paths", None)
if repo_include_file:
Expand Down
2 changes: 2 additions & 0 deletions tartufo/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class GlobalOptions:
"exclude_path_patterns",
"exclude_entropy_patterns",
"exclude_signatures",
"exclude_findings",
"output_dir",
"git_rules_repo",
"git_rules_files",
Expand All @@ -65,6 +66,7 @@ class GlobalOptions:
exclude_path_patterns: Tuple[str, ...]
exclude_entropy_patterns: Tuple[Dict[str, str], ...]
exclude_signatures: Tuple[str, ...]
exclude_findings: Tuple[Dict[str, str], ...]
output_dir: Optional[str]
git_rules_repo: Optional[str]
git_rules_files: Tuple[str, ...]
Expand Down
17 changes: 14 additions & 3 deletions tests/test_base_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,23 +266,34 @@ def test_regex_rules_are_computed_when_first_accessed(
class SignatureTests(ScannerTestCase):
@mock.patch("tartufo.util.generate_signature")
def test_matched_signatures_are_excluded(self, mock_signature: mock.MagicMock):
self.options.exclude_signatures = ("foo",)
mock_signature.return_value = "foo"
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [{"signature": "foo", "reason": "foo finding"}]
}
self.assertTrue(test_scanner.signature_is_excluded("bar", "blah"))

@mock.patch("tartufo.util.generate_signature")
def test_unmatched_signatures_are_not_excluded(
self, mock_signature: mock.MagicMock
):
self.options.exclude_signatures = ("foo",)
mock_signature.return_value = "bar"
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [{"signature": "foo", "reason": "foo finding"}]
}
self.assertFalse(test_scanner.signature_is_excluded("blah", "stuff"))

def test_signature_found_as_scan_match_is_excluded(self):
self.options.exclude_signatures = ("ford_prefect",)
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [
{"signature": "ford_prefect", "reason": "ford_prefect finding"}
]
}
self.assertTrue(test_scanner.signature_is_excluded("ford_prefect", "/earth"))


Expand Down
11 changes: 4 additions & 7 deletions tests/test_folder_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ def test_scan_should_detect_entropy_and_not_binary(self):
self.global_options.entropy = True
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.exclude_path_patterns = [r"donotscan\.txt"]

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(2, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertIn("KQ0I97OBuPlGB9yPRxoSxnX52zE=", actual_issues)
self.assertIn(
Expand All @@ -40,7 +39,7 @@ def test_scan_should_raise_click_error_on_file_permissions_issues(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = True
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)

Expand All @@ -52,15 +51,14 @@ def test_scan_all_the_files_recursively(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = True
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(3, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertEqual(2, actual_issues.count("KQ0I97OBuPlGB9yPRxoSxnX52zE="))
self.assertIn(
Expand All @@ -72,15 +70,14 @@ def test_scan_only_root_level_files(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = False
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(2, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertEqual(2, actual_issues.count("KQ0I97OBuPlGB9yPRxoSxnX52zE="))

Expand Down
7 changes: 5 additions & 2 deletions tests/test_git_repo_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,17 @@ def test_extra_exclusions_get_added(self, mock_load: mock.MagicMock):
def test_extra_signatures_get_added(self, mock_load: mock.MagicMock):
mock_load.return_value = (
self.data_dir / "pyproject.toml",
{"exclude_signatures": ["foo", "bar"]},
{
"exclude_signatures": ["foo", "bar"],
"exclude_findings": [{"signature": "foo/bar", "reason": "test reason"}],
},
)
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, str(self.data_dir)
)
test_scanner.load_repo("../tartufo")
self.assertEqual(
sorted(test_scanner.global_options.exclude_signatures), ["bar", "foo"]
sorted(test_scanner.excluded_findings), ["bar", "foo", "foo/bar"]
)


Expand Down

0 comments on commit 06a3833

Please sign in to comment.