Skip to content

Commit

Permalink
Review feedback tuneups
Browse files Browse the repository at this point in the history
* Consolidate common code for entropy limit back into a single method,
  and rework properties related to it so they are cleaner.
* Invert sensitivity scale; adjust math and doc to match. It's still
  weird but aligns more closely to the underlying entropy metric.
  • Loading branch information
rscottbailey committed Nov 12, 2021
1 parent 88f5c3e commit 61497ac
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 64 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ Options:
--entropy-sensitivity INTEGER RANGE
Modify entropy detection sensitivity. This
is expressed as on a scale of 0 to 100,
where 0 means "totally random" and 100 means
"totally nonrandom". Increasing the
where 0 means "totally nonrandom" and 100
means "totally random". Decreasing the
scanner's sensitivity increases the
likelihood that a given string will be
identified as suspicious. [default: 25]
identified as suspicious. [default: 75]

-b64, --b64-entropy-score TEXT [DEPRECATED] Use `--entropy-sensitivity`.
Modify the base64 entropy score. If a value
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ sphinx-click = {version = "^2.5.0", optional = true}
sphinx-rtd-theme = {version = "^0.5.0", optional = true}
sphinxcontrib-spelling = {version = "^5.4.0", optional = true}
tomlkit = "^0.7.2"
cached-property = "^1.5.2"

[tool.poetry.dev-dependencies]
black = {version = "21.5b2", allow-prereleases = true, markers = "platform_python_implementation == 'CPython'"}
Expand Down
6 changes: 3 additions & 3 deletions tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,11 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
@click.option(
"--entropy-sensitivity",
type=click.IntRange(0, 100),
default=25,
default=75,
show_default=True,
help="""Modify entropy detection sensitivity. This is expressed as on a scale
of 0 to 100, where 0 means "totally random" and 100 means "totally nonrandom".
Increasing the scanner's sensitivity increases the likelihood that a given
of 0 to 100, where 0 means "totally nonrandom" and 100 means "totally random".
Decreasing the scanner's sensitivity increases the likelihood that a given
string will be identified as suspicious.""",
)
@click.option(
Expand Down
79 changes: 27 additions & 52 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
)
import warnings

from cached_property import cached_property
import click
import git

import pygit2

from tartufo import config, types, util
Expand Down Expand Up @@ -147,25 +147,22 @@ def __init__(self, options: types.GlobalOptions) -> None:
self.global_options = options
self.logger = logging.getLogger(__name__)

@lru_cache(maxsize=None)
def compute_hex_entropy_limit(self) -> float:
"""Determine low limit for suspicious hexadecimal encodings"""

# entropy_score is a probability (between 0.0 and 1.0) that a given string
# is random. Strings that are at least this likely to be random will result
# in findings. We convert this from "sensitivity" (0-100) which is inverted
# so that intuitively "more sensitive" means "more likely to flag a given
# string as suspicious."
def compute_scaled_entropy_limit(self, maximum_bitrate: float) -> float:
"""Determine low entropy cutoff for specified bitrate
:param maximum_bitrate: How many bits does each character represent?
:returns: Entropy detection threshold scaled to the input bitrate
"""

if self.global_options.entropy_sensitivity is None:
sensitivity = 25
sensitivity = 75
else:
sensitivity = self.global_options.entropy_sensitivity
entropy_score = float(100 - sensitivity) / 100.0
return float(sensitivity) / 100.0 * maximum_bitrate

# Each hexadecimal digit represents a 4-bit number, so we want to scale
# the base score by this amount to account for the efficiency of the
# string representation we're examining.
hex_entropy_score = entropy_score * 4.0
@cached_property
def hex_entropy_limit(self) -> float:
"""Returns low entropy limit for suspicious hexadecimal encodings"""

# For backwards compatibility, allow the caller to manipulate this score
# # directly (but complain about it).
Expand All @@ -174,36 +171,16 @@ def compute_hex_entropy_limit(self) -> float:
"--hex-entropy-score is deprecated. Use --entropy-sensitivity instead.",
DeprecationWarning,
)
hex_entropy_score = self.global_options.hex_entropy_score
return self.global_options.hex_entropy_score

return hex_entropy_score

@property
def hex_entropy_limit(self) -> float:
"""Returns low limit for suspicious hexadecimal encodings"""

return self.compute_hex_entropy_limit()

@lru_cache(maxsize=None)
def compute_b64_entropy_limit(self) -> float:
"""Returns low limit for suspicious base64 encodings"""

# entropy_score is a probability (between 0.0 and 1.0) that a given string
# is random. Strings that are at least this likely to be random will result
# in findings. We convert this from "sensitivity" (0-100) which is inverted
# so that intuitively "more sensitive" means "more likely to flag a given
# string as suspicious."
if self.global_options.entropy_sensitivity is None:
sensitivity = 25
else:
sensitivity = self.global_options.entropy_sensitivity
entropy_score = float(100 - sensitivity) / 100.0
# Each hexadecimal digit represents a 4-bit number, so we want to scale
# the base score by this amount to account for the efficiency of the
# string representation we're examining.
return self.compute_scaled_entropy_limit(4.0)

# Each 4-character base64 group represents 3 8-bit bytes, i.e. an effective
# bit rate of 24/4 = 6 bits per character. We want to scale the base score
# by this amount to account for the efficiency of the string representation
# we're examining.
b64_entropy_score = entropy_score * 6.0
@cached_property
def b64_entropy_limit(self) -> float:
"""Returns low entropy limit for suspicious base64 encodings"""

# For backwards compatibility, allow the caller to manipulate this score
# # directly (but complain about it).
Expand All @@ -212,15 +189,13 @@ def compute_b64_entropy_limit(self) -> float:
"--b64-entropy-score is deprecated. Use --entropy-sensitivity instead.",
DeprecationWarning,
)
b64_entropy_score = self.global_options.b64_entropy_score
return self.global_options.b64_entropy_score

return b64_entropy_score

@property
def b64_entropy_limit(self) -> float:
"""Returns low limit for suspicious base64 encodings"""

return self.compute_b64_entropy_limit()
# Each 4-character base64 group represents 3 8-bit bytes, i.e. an effective
# bit rate of 24/4 = 6 bits per character. We want to scale the base score
# by this amount to account for the efficiency of the string representation
# we're examining.
return self.compute_scaled_entropy_limit(6.0)

@property
def completed(self) -> bool:
Expand Down
16 changes: 12 additions & 4 deletions tests/test_base_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,16 +578,24 @@ def test_sensitivity_low_end_calculation(self):
test_scanner = TestScanner(self.options)

# 0% sensitivity means entropy rate must equal bit rate
self.assertEqual(test_scanner.b64_entropy_limit, 6.0)
self.assertEqual(test_scanner.hex_entropy_limit, 4.0)
self.assertEqual(test_scanner.b64_entropy_limit, 0.0)
self.assertEqual(test_scanner.hex_entropy_limit, 0.0)

def test_sensitivity_high_end_calculation(self):
self.options.entropy_sensitivity = 100
test_scanner = TestScanner(self.options)

# 100% sensitivity means required entropy rate will be zero
self.assertEqual(test_scanner.b64_entropy_limit, 0.0)
self.assertEqual(test_scanner.hex_entropy_limit, 0.0)
self.assertEqual(test_scanner.b64_entropy_limit, 6.0)
self.assertEqual(test_scanner.hex_entropy_limit, 4.0)

def test_sensitivity_deprecated_overrides(self):
self.options.b64_entropy_score = 11.1
self.options.hex_entropy_score = 22.2
test_scanner = TestScanner(self.options)

self.assertEqual(test_scanner.b64_entropy_limit, 11.1)
self.assertEqual(test_scanner.hex_entropy_limit, 22.2)


if __name__ == "__main__":
Expand Down

0 comments on commit 61497ac

Please sign in to comment.