Skip to content

Commit

Permalink
Add --sensitivity option
Browse files Browse the repository at this point in the history
Fixes #265

Provide more comprehensible alternative for tuning entropy checking.
This is applied consistently across all target character sets, and stated
in a way that is slightly easier to understand ("higher means more
likely to flag a given string").

The older `--b64-entropy-score` and `--hex-entropy-score` options are
marked as deprecated but retained for backwards compatibility (and they
override `--sensitivity` if used together with it).
  • Loading branch information
rscottbailey committed Nov 12, 2021
1 parent ca46e5c commit 42ad8ac
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 67 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Features:
`--scan-filenames/--no-scan-filenames` flag which allows users to enable or disable file name scanning.
* [#254](https://github.com/godaddy/tartufo/pull/260) - Changes the default value of
`--regex/--no-regex` to True.
* [#265](https://github.com/godaddy/tartufo/issues/265) - Adds new `--sensitivity`
option which provides a friendlier way to adjust entropy detection sensitivity.
This replaces `--b64-entropy-score` and `--hex-entropy-score`, which now are
marked as deprecated.

Misc:

Expand Down
32 changes: 20 additions & 12 deletions tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,25 +200,33 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
show_default=True,
help="Enable or disable timestamps in logging messages.",
)
@click.option(
"--sensitivity",
type=click.IntRange(0, 100),
default=25,
show_default=True,
help="""Modify entropy detection sensitivity. This is expressed as the probability
(between 0 and 100) a given string is *not* random and should be ignored.
A higher value increases the likelihood a string will be considered suspicious
and result in a finding.""",
)
@click.option(
"-b64",
"--b64-entropy-score",
default=4.5,
show_default=True,
help="Modify the base64 entropy score. If a value greater than the default is "
"specified, tartufo lists higher entropy base64 strings (longer or more randomized "
"strings). A lower value lists lower entropy base64 strings (shorter or less "
"randomized strings).",
help="""[DEPRECATED] Use `--sensitivity`. Modify the base64 entropy score. If
a value greater than the default (4.5 in a range of 0.0-6.0) is specified,
tartufo lists higher entropy base64 strings (longer or more randomized strings.
A lower value lists lower entropy base64 strings (shorter or less randomized
strings).""",
)
@click.option(
"-hex",
"--hex-entropy-score",
default=3.0,
show_default=True,
help="Modify the hexadecimal entropy score. If a value greater than the default is "
"specified, tartufo lists higher entropy hexadecimal strings (longer or more randomized "
"strings). A lower value lists lower entropy hexadecimal strings (shorter or less "
"randomized strings).",
help="""[DEPRECATED] Use `--sensitivity`. Modify the hexadecimal entropy score.
If a value greater than the default (3.0 in a range of 0.0-4.0) is specified,
tartufo lists higher entropy hexadecimal strings (longer or more randomized
strings). A lower value lists lower entropy hexadecimal strings (shorter or less
randomized strings).""",
)
# The first positional argument here would be a hard-coded version, hence the `None`
@click.version_option(None, "-V", "--version")
Expand Down
46 changes: 39 additions & 7 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,46 @@ class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
global_options: types.GlobalOptions
logger: logging.Logger
_scan_lock: threading.Lock = threading.Lock()
_hex_entropy_score: float = 3.0
_b64_entropy_score: float = 4.5

def __init__(self, options: types.GlobalOptions) -> None:
self.global_options = options
self.logger = logging.getLogger(__name__)

# entropy_score is a probability (between 0.0 and 1.0) that a given string
# is random. Strings that are at least this likely to be random will result
# in findings. We convert this from "sensitivity" (0-100) which is inverted
# so that intuitively "more sensitive" means "more likely to flag a given
# string as suspicious."
if self.global_options.sensitivity is None:
sensitivity = 25
else:
sensitivity = self.global_options.sensitivity
entropy_score = float(100 - sensitivity) / 100.0

# We now compute an effective score for each type of entropy string by
# multiplying by the number of bits expressed in each character of the
# string's character set:
# hex character 0-f is 16 digits = 2^4 digits = 4 bits/character
# base64 represents 24 bits using 4 characters = 6 bits/character
self._hex_entropy_score = entropy_score * 4.0
self._b64_entropy_score = entropy_score * 6.0

# For backwards compatibility, allow the caller to manipulate each of
# these representation-specific scores directly (but complain about it).
if self.global_options.hex_entropy_score:
warnings.warn(
"--hex-entropy-score is deprecated. Use --sensitivity instead."
)
self._hex_entropy_score = self.global_options.hex_entropy_score

if self.global_options.b64_entropy_score:
warnings.warn(
"--b64-entropy-score is deprecated. Use --sensitivity instead."
)
self._b64_entropy_score = self.global_options.b64_entropy_score

@property
def completed(self) -> bool:
"""Return True if scan has completed
Expand Down Expand Up @@ -398,22 +433,19 @@ def scan(self) -> Generator[Issue, None, None]:
if self.global_options.entropy:
for issue in self.scan_entropy(
chunk,
self.global_options.b64_entropy_score,
self.global_options.hex_entropy_score,
):
self._issues.append(issue)
yield issue
self._completed = True
self.logger.info("Found %d issues.", len(self._issues))

def scan_entropy(
self, chunk: types.Chunk, b64_entropy_score: float, hex_entropy_score: float
self,
chunk: types.Chunk,
) -> Generator[Issue, None, None]:
"""Scan a chunk of data for apparent high entropy.
:param chunk: The chunk of data to be scanned
:param b64_entropy_score: Base64 entropy score
:param hex_entropy_score: Hexadecimal entropy score
"""

for line in chunk.contents.split("\n"):
Expand All @@ -423,12 +455,12 @@ def scan_entropy(

for string in b64_strings:
yield from self.evaluate_entropy_string(
chunk, line, string, BASE64_CHARS, b64_entropy_score
chunk, line, string, BASE64_CHARS, self._b64_entropy_score
)

for string in hex_strings:
yield from self.evaluate_entropy_string(
chunk, line, string, HEX_CHARS, hex_entropy_score
chunk, line, string, HEX_CHARS, self._hex_entropy_score
)

def evaluate_entropy_string(
Expand Down
2 changes: 2 additions & 0 deletions tartufo/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class GlobalOptions:
"output_format",
"b64_entropy_score",
"hex_entropy_score",
"sensitivity",
)
rules: Tuple[TextIO, ...]
default_regexes: bool
Expand All @@ -46,6 +47,7 @@ class GlobalOptions:
output_format: Optional[str]
b64_entropy_score: float
hex_entropy_score: float
sensitivity: int


@dataclass
Expand Down
76 changes: 28 additions & 48 deletions tests/test_base_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def test_scan_iterates_through_all_chunks(self, mock_entropy: mock.MagicMock):
list(test_scanner.scan())
mock_entropy.assert_has_calls(
(
mock.call("foo", 4.5, 3),
mock.call("bar", 4.5, 3),
mock.call("baz", 4.5, 3),
mock.call("foo"),
mock.call("bar"),
mock.call("baz"),
),
any_order=True,
)
Expand Down Expand Up @@ -431,11 +431,7 @@ def test_scan_entropy_find_b64_strings_for_every_word_in_diff(
self, mock_strings: mock.MagicMock
):
mock_strings.return_value = []
b64_entropy_score = 4.5
hex_entropy_score = 3
list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
list(self.scanner.scan_entropy(self.chunk))
mock_strings.assert_has_calls(
(
mock.call("foo", scanner.BASE64_CHARS),
Expand All @@ -458,11 +454,7 @@ def test_issues_are_not_created_for_b64_string_excluded_signatures(
):
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = True
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
mock_calculate.assert_not_called()
self.assertEqual(issues, [])

Expand All @@ -477,11 +469,7 @@ def test_issues_are_not_created_for_hex_string_excluded_signatures(
):
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = True
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
mock_calculate.assert_not_called()
self.assertEqual(issues, [])

Expand All @@ -497,11 +485,7 @@ def test_issues_are_created_for_high_entropy_b64_strings(
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 9.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0].issue_type, types.IssueType.Entropy)
self.assertEqual(issues[0].matched_string, "foo")
Expand All @@ -518,11 +502,7 @@ def test_issues_are_created_for_high_entropy_hex_strings(
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 9.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0].issue_type, types.IssueType.Entropy)
self.assertEqual(issues[0].matched_string, "foo")
Expand All @@ -542,11 +522,7 @@ def test_issues_are_not_created_for_high_entropy_hex_strings_given_entropy_is_ex
mock_entropy.return_value = True
mock_signature.return_value = False
mock_calculate.return_value = 9.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -564,11 +540,7 @@ def test_issues_are_not_created_for_low_entropy_b64_strings_given_entropy_is_exc
mock_entropy.return_value = True
mock_signature.return_value = False
mock_calculate.return_value = 9.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -583,11 +555,7 @@ def test_issues_are_not_created_for_low_entropy_b64_strings(
mock_strings.side_effect = (["foo"], [], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 1.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 0)

@mock.patch("tartufo.scanner.ScannerBase.calculate_entropy")
Expand All @@ -602,13 +570,25 @@ def test_issues_are_not_created_for_low_entropy_hex_strings(
mock_strings.side_effect = ([], ["foo"], [], [], [], [])
mock_signature.return_value = False
mock_calculate.return_value = 1.0
b64_entropy_score = 4.5
hex_entropy_score = 3
issues = list(
self.scanner.scan_entropy(self.chunk, b64_entropy_score, hex_entropy_score)
)
issues = list(self.scanner.scan_entropy(self.chunk))
self.assertEqual(len(issues), 0)

def test_sensitivity_low_end_calculation(self):
self.options.sensitivity = 0
scanner = TestScanner(self.options)

# 0% sensitivity means entropy rate must equal bit rate
self.assertEqual(scanner._b64_entropy_score, 6.0)
self.assertEqual(scanner._hex_entropy_score, 4.0)

def test_sensitivity_high_end_calculation(self):
self.options.sensitivity = 100
scanner = TestScanner(self.options)

# 100% sensitivity means required entropy rate will be zero
self.assertEqual(scanner._b64_entropy_score, 0.0)
self.assertEqual(scanner._hex_entropy_score, 0.0)


if __name__ == "__main__":
unittest.main()

0 comments on commit 42ad8ac

Please sign in to comment.