Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating --include-path-patterns and --exclude-path-patterns to use a table of tables #289

Merged
merged 14 commits into from
Dec 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Features:
that was still stored in an external file.
* [#202](https://github.com/godaddy/tartufo/issues/202) - Supports new format of exclusions in config file
with the ability to specify the reason along with exclusion
* [#257](https://github.com/godaddy/tartufo/issues/257) - Supports new format of include-path-patterns and
exclude-path-patterns in config file with the ability to specify the reason along with the path-patterns.

v3.0.0-alpha.1 - 11 November 2021
---------------------------------
Expand Down
14 changes: 9 additions & 5 deletions tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,23 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
"-ip",
"--include-path-patterns",
multiple=True,
hidden=True,
help="""Specify a regular expression which matches Git object paths to
include in the scan. This option can be specified multiple times to include
multiple patterns. If not provided (default), all Git object paths are
included unless otherwise excluded via the --exclude-path-patterns
include in the scan. Multiple patterns can be included in the config file using
include-path-patterns = [{path-pattern="pattern", reason="reason to include pattern},].
If not provided (default), all Git object paths
are included unless otherwise excluded via the --exclude-path-patterns
rbailey-godaddy marked this conversation as resolved.
Show resolved Hide resolved
option.""",
)
@click.option(
"-xp",
"--exclude-path-patterns",
multiple=True,
hidden=True,
help="""Specify a regular expression which matches Git object paths to
exclude from the scan. This option can be specified multiple times to
exclude multiple patterns. If not provided (default), no Git object paths
exclude from the scan. Multiple patterns can be excluded in the config file using
exclude-path-patterns = [{path-pattern="pattern", reason="reason to exclude pattern},].
If not provided (default), no Git object paths
are excluded unless effectively excluded via the --include-path-patterns
option.""",
)
Expand Down
118 changes: 66 additions & 52 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,20 +240,46 @@ def issues(self) -> List[Issue]:
return self._issues

@property
def included_paths(self) -> List[Pattern]:
"""Get a list of regexes used as an exclusive list of paths to scan.
def config_data(self):
return self._config_data

:rtype: List[Pattern]
"""
@config_data.setter
def config_data(self, data: MutableMapping[str, Any]) -> None:
self._config_data = data

@property
def included_paths(self) -> List[Pattern]:
"""Get a list of regexes used as an exclusive list of paths to scan"""
if self._included_paths is None:
self.logger.info("Initializing included paths")
patterns = list(self.global_options.include_path_patterns or ())
self._included_paths = (
config.compile_path_rules(set(patterns)) if patterns else []
)
self.logger.debug(
"Included paths was initialized as: %s", self._included_paths
)
patterns: Set[str] = set()
deprecated = False
for pattern in tuple(
self.global_options.include_path_patterns or []
) + tuple(self.config_data.get("include_path_patterns", [])):
if isinstance(pattern, dict):
try:
patterns.add(pattern["path-pattern"])
except KeyError as exc:
raise types.ConfigException(
"Required key path-pattern missing in include-path-patterns"
) from exc
elif isinstance(pattern, str):
deprecated = True
patterns.add(pattern)
else:
raise types.ConfigException(
f"{type(pattern).__name__} pattern is illegal in include-path-patterns"
)
if deprecated:
warnings.warn(
"Old format of --include-path-patterns option and config file setup include-path-patterns "
"= ['inclusion pattern'] has been deprecated and will be removed in a future version. "
"Make sure all the inclusions are set up using new pattern i.e. include-path-patterns = "
"[{path-pattern='inclusion pattern',reason='reason for inclusion'}] in the config file",
DeprecationWarning,
)
self._included_paths = config.compile_path_rules(patterns)
return self._included_paths

@property
Expand All @@ -273,19 +299,37 @@ def excluded_entropy(self) -> List[Rule]:

@property
def excluded_paths(self) -> List[Pattern]:
"""Get a list of regexes used to match paths to exclude from the scan.

:rtype: List[Pattern]
"""
"""Get a list of regexes used to match paths to exclude from the scan"""
if self._excluded_paths is None:
self.logger.info("Initializing excluded paths")
patterns = list(self.global_options.exclude_path_patterns or ())
self._excluded_paths = (
config.compile_path_rules(set(patterns)) if patterns else []
)
self.logger.debug(
"Excluded paths was initialized as: %s", self._excluded_paths
)
patterns: Set[str] = set()
deprecated = False
for pattern in tuple(
self.global_options.exclude_path_patterns or []
) + tuple(self.config_data.get("exclude_path_patterns", [])):
if isinstance(pattern, dict):
try:
patterns.add(pattern["path-pattern"])
except KeyError as exc:
raise types.ConfigException(
"Required key path-pattern missing in exclude-path-patterns"
) from exc
elif isinstance(pattern, str):
deprecated = True
patterns.add(pattern)
else:
raise types.ConfigException(
f"{type(pattern).__name__} pattern is illegal in exclude-path-patterns"
)
if deprecated:
warnings.warn(
"Old format of --exclude-path-patterns option and config file setup exclude-path-patterns "
"= ['exclusion pattern'] has been deprecated and will be removed in a future version. "
"Make sure all the exclusions are set up using new pattern i.e. exclude-path-patterns = "
"[{path-pattern='exclusion pattern',reason='reason for exclusion'}] in the config file",
DeprecationWarning,
)
self._excluded_paths = config.compile_path_rules(patterns)
return self._excluded_paths

@property
Expand Down Expand Up @@ -339,14 +383,6 @@ def should_scan(self, file_path: str):
return False
return True

@property
def config_data(self):
return self._config_data

@config_data.setter
def config_data(self, data: MutableMapping[str, Any]) -> None:
self._config_data = data

@cached_property
def excluded_signatures(self) -> Tuple[str, ...]:
if self._excluded_signatures is None:
Expand Down Expand Up @@ -675,7 +711,6 @@ def load_repo(self, repo_path: str) -> pygit2.Repository:


class GitRepoScanner(GitScanner):

git_options: types.GitOptions

def __init__(
Expand Down Expand Up @@ -704,27 +739,6 @@ def load_repo(self, repo_path: str) -> pygit2.Repository:
config_file = None
if config_file and config_file != self.global_options.config:
self.config_data = data
include_patterns = list(data.get("include_path_patterns", ()))
repo_include_file = data.get("include_paths", None)
if repo_include_file:
repo_include_file = pathlib.Path(repo_path, repo_include_file)
if repo_include_file.exists():
with repo_include_file.open() as handle:
include_patterns += handle.readlines()
if include_patterns:
include_patterns = config.compile_path_rules(include_patterns)
self._included_paths = list(set(self.included_paths + include_patterns))

exclude_patterns = list(data.get("exclude_path_patterns", ()))
repo_exclude_file = data.get("exclude_paths", None)
if repo_exclude_file:
repo_exclude_file = pathlib.Path(repo_path, repo_exclude_file)
if repo_exclude_file.exists():
with repo_exclude_file.open() as handle:
exclude_patterns += handle.readlines()
if exclude_patterns:
exclude_patterns = config.compile_path_rules(exclude_patterns)
self._excluded_paths = list(set(self.excluded_paths + exclude_patterns))
rbailey-godaddy marked this conversation as resolved.
Show resolved Hide resolved
try:
repo = pygit2.Repository(repo_path)
if not self.git_options.include_submodules:
Expand Down
4 changes: 2 additions & 2 deletions tartufo/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class GlobalOptions:
entropy: bool
regex: bool
scan_filenames: bool
include_path_patterns: Tuple[str, ...]
exclude_path_patterns: Tuple[str, ...]
include_path_patterns: Union[Tuple[str, ...], Tuple[Dict[str, str], ...]]
exclude_path_patterns: Union[Tuple[str, ...], Tuple[Dict[str, str], ...]]
exclude_entropy_patterns: Tuple[Dict[str, str], ...]
exclude_signatures: Union[Tuple[Dict[str, str], ...], Tuple[str, ...]]
output_dir: Optional[str]
Expand Down
67 changes: 65 additions & 2 deletions tests/test_git_repo_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ def test_load_repo_does_not_filter_submodules_when_requested(
def test_extra_inclusions_get_added(self, mock_load: mock.MagicMock):
mock_load.return_value = (
self.data_dir / "pyproject.toml",
{"include_paths": "include-files", "include_path_patterns": ("foo/",)},
{"include_path_patterns": ("tartufo/", "scripts/")},
)
self.global_options.include_path_patterns = ("foo/",)
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, str(self.data_dir)
)
Expand All @@ -77,8 +78,9 @@ def test_extra_inclusions_get_added(self, mock_load: mock.MagicMock):
def test_extra_exclusions_get_added(self, mock_load: mock.MagicMock):
mock_load.return_value = (
self.data_dir / "pyproject.toml",
{"exclude_paths": "exclude-files", "exclude_path_patterns": ("bar/",)},
{"exclude_path_patterns": ("tests/", r"\.venv/", r".*\.egg-info/")},
)
self.global_options.exclude_path_patterns = ("bar/",)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as the test above.

test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, str(self.data_dir)
)
Expand Down Expand Up @@ -476,5 +478,66 @@ def test_error_is_not_raised_when_two_styles_signatures_are_configured(self):
self.assertCountEqual(test_scanner.excluded_signatures, ("foo/", "bar/"))


class IncludedPathsTests(ScannerTestCase):
def test_old_style_included_paths_are_processed(self):
self.global_options.include_path_patterns = ["bar/"]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertEqual(test_scanner.included_paths, [re.compile("bar/")])

def test_new_style_included_paths_are_processed(self):
self.global_options.include_path_patterns = [
{"path-pattern": "bar/", "reason": "path pattern"}
]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertEqual(test_scanner.included_paths, [re.compile("bar/")])

def test_error_is_not_raised_when_two_styles_included_paths_are_configured(self):
self.global_options.include_path_patterns = [
"foo/",
{"path-pattern": "bar/", "reason": "path pattern"},
]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertCountEqual(
test_scanner.included_paths, [re.compile("foo/"), re.compile("bar/")]
)


class ExcludedPathsTests(ScannerTestCase):
def test_old_style_excluded_paths_are_processed(self):
self.global_options.exclude_path_patterns = ["bar/"]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertEqual(test_scanner.excluded_paths, [re.compile("bar/")])

def test_new_style_excluded_paths_are_processed(self):
self.global_options.exclude_path_patterns = [
{"path-pattern": "bar/", "reason": "path pattern"}
]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertEqual(test_scanner.excluded_paths, [re.compile("bar/")])

@mock.patch("tartufo.scanner.GitScanner.filter_submodules", mock.MagicMock())
def test_error_is_not_raised_when_two_styles_excluded_paths_are_configured(self):
self.global_options.exclude_path_patterns = [
"foo/",
{"path-pattern": "bar/", "reason": "path pattern"},
]
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, "."
)
self.assertCountEqual(
test_scanner.excluded_paths, [re.compile("foo/"), re.compile("bar/")]
)


if __name__ == "__main__":
unittest.main()