From 1d3d3ca0aafb0494216b22b76b3b06a9422b6bbe Mon Sep 17 00:00:00 2001 From: Mikail Kocak Date: Tue, 16 Apr 2024 15:08:38 +0200 Subject: [PATCH 1/3] feature: ignore unsupported YAML files This adds logic to ignore unsupported YAML files (non-GitHub Workflows, and non-GitHub Composite Actions YAML files). This new logic is also enabled by default. It makes it easier to work with `shellcheck-gha` as `.github/` directory may contains multiple kinds of YAML files such as `dependabot.yaml`. Before this change it would require to make sure there are no unrelated YAML files in the `.github/` directory which is inconvenient and makes the tool hard to use. --- shellcheck_gha/console/app.py | 12 ++++ shellcheck_gha/extractor.py | 29 ++++++++- shellcheck_gha/models/github_action.py | 23 ++++--- tests/extractor/__init__.py | 0 tests/{ => extractor}/test_iter_workflows.py | 0 tests/extractor/test_skip_invalid_yaml.py | 66 ++++++++++++++++++++ tests/models/test_github_action.py | 26 ++++---- 7 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 tests/extractor/__init__.py rename tests/{ => extractor}/test_iter_workflows.py (100%) create mode 100644 tests/extractor/test_skip_invalid_yaml.py diff --git a/shellcheck_gha/console/app.py b/shellcheck_gha/console/app.py index 07606e8..5f32ed4 100644 --- a/shellcheck_gha/console/app.py +++ b/shellcheck_gha/console/app.py @@ -28,6 +28,17 @@ def parse_args(args: list[str] | None) -> argparse.Namespace: action="store_true", help="Add debug information (takes precedence over --verbose).", ) + parser.add_argument( + "--skip-unknown-files", + action=argparse.BooleanOptionalAction, + help=( + "Whether to exit with an error on when parsing non-GitHub workflow " + "or composite action YAML files. Skipping is useful when a directory " + "may be mixed with other YAML files " + "(e.g. config files such as .github/dependabot.yaml)." + ), + default=True, + ) return parser.parse_args(args) @@ -53,6 +64,7 @@ def main(out=sys.stdout, args: list[str] | None = None) -> None: num_files_scanned, num_snippets_scanned, findings = Extractor( directory=directory, default_shell=ns.default_shell, + raise_on_unsupported_yaml=not ns.skip_unknown_files, ).run() print(f"=== Results: {len(findings)} file(s) have findings ===") diff --git a/shellcheck_gha/extractor.py b/shellcheck_gha/extractor.py index dc48a79..c020413 100644 --- a/shellcheck_gha/extractor.py +++ b/shellcheck_gha/extractor.py @@ -5,8 +5,13 @@ from pathlib import Path import yaml +from pydantic import ValidationError -from shellcheck_gha.models.github_action import GitHubYAML, ShellSnippet +from shellcheck_gha.models.github_action import ( + GitHubYAML, + ShellSnippet, + UnknownYAMLFileException, +) from shellcheck_gha.models.shell_check_json1 import ShellCheckOutput logger = logging.getLogger(__name__) @@ -24,6 +29,13 @@ class Finding: @dataclasses.dataclass class Extractor: directory: Path + + # Whether to raise an exception when the parsed YAML + # file is not a GitHub workflow or action. + # ``raise_on_invalid_yaml=False`` is useful when a folder + # may contain other files (e.g., dependabot config files). + raise_on_unsupported_yaml: bool = True + default_shell: str = DEFAULT_SHELL def iter_workflow_paths(self): @@ -93,11 +105,22 @@ def run(self) -> tuple[int, int, list[Finding]]: logger.info("Scanning the directory: %s", self.directory) for yaml_path in self.iter_workflow_paths(): - num_files_scanned += 1 logger.info("Checking the YAML file: %s", yaml_path) with yaml_path.open() as fp: - parsed_yaml = GitHubYAML.model_validate(yaml.safe_load(fp)) + try: + parsed_yaml = GitHubYAML.model_validate(yaml.safe_load(fp)) + except ValidationError as exc: + # Skip the file if we are not in a strict mode. + if self.raise_on_unsupported_yaml is False: + logger.warning( + "Skipping invalid YAML file (%s): %s", yaml_path, exc + ) + continue + raise UnknownYAMLFileException(exc) from exc + + # Only increment if the file is successfully parsed. + num_files_scanned += 1 for snippet in parsed_yaml.iter_shell_scripts(yaml_path): num_snippets_scanned += 1 diff --git a/shellcheck_gha/models/github_action.py b/shellcheck_gha/models/github_action.py index ba3a307..7b384ec 100644 --- a/shellcheck_gha/models/github_action.py +++ b/shellcheck_gha/models/github_action.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Optional, Generator -from pydantic import BaseModel, Field, computed_field +from pydantic import BaseModel, Field, computed_field, model_validator, ValidationError class UnknownYAMLFileException(Exception): @@ -11,6 +11,12 @@ class UnknownYAMLFileException(Exception): a GitHub Workflow, or a GitHub Composite Action. """ + def __init__(self, cause: ValidationError): + self.cause: ValidationError = cause + + def __str__(self) -> str: + return str(self.cause) + @dataclasses.dataclass class ShellSnippet: @@ -62,6 +68,12 @@ class GitHubYAML(BaseModel): jobs: Optional[dict[str, GitHubSteps]] = None runs: Optional[GitHubSteps] = None + @model_validator(mode="after") + def check_contains_one_of_required_fields(self) -> "GitHubYAML": + if self.jobs or self.runs: + return self + raise AssertionError("The YAML file should contain either 'jobs' or 'runs'.") + def iter_shell_scripts( self, yaml_path: Path ) -> Generator[ShellSnippet, None, None]: @@ -69,13 +81,6 @@ def iter_shell_scripts( for job in self.jobs.values(): for snippet in job.iter_shell_scripts(yaml_path): yield snippet - elif self.runs: + if self.runs: for snippet in self.runs.iter_shell_scripts(yaml_path): yield snippet - else: - # This code should never be reached unless a non-GitHub workflow or action - # YAML file was provided to the CLI. - raise UnknownYAMLFileException( - "The YAML file should contain either 'jobs' or 'runs'.", - yaml_path, - ) diff --git a/tests/extractor/__init__.py b/tests/extractor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_iter_workflows.py b/tests/extractor/test_iter_workflows.py similarity index 100% rename from tests/test_iter_workflows.py rename to tests/extractor/test_iter_workflows.py diff --git a/tests/extractor/test_skip_invalid_yaml.py b/tests/extractor/test_skip_invalid_yaml.py new file mode 100644 index 0000000..7d13d34 --- /dev/null +++ b/tests/extractor/test_skip_invalid_yaml.py @@ -0,0 +1,66 @@ +# TODO: add test case that ensures it raises an exception if a YAML file is not YAML. +import pytest +import yaml.parser + +from shellcheck_gha.extractor import Extractor +from shellcheck_gha.models.github_action import UnknownYAMLFileException + + +def test_skips_yaml_unsupported_files(tmp_path): + """ + When an unknown YAML file (non-GitHub workflow and non-composite action YAML) + is passed and ``raise_on_unsupported_yaml`` is set to ``False`` + it shouldn't raise an exception, instead it should skip the file + and proceed to the next YAML files. + """ + + invalid_yaml_file = tmp_path / "invalid.yaml" + invalid_yaml_file.write_text("{}") + + valid_yaml = tmp_path / "valid.yaml" + valid_yaml.write_text( + """\ +runs: + using: "composite" + steps: + - run: echo $BAD_COMPOSITE_ACTION + shell: bash + - run: echo "$GOOD_COMPOSITE_ACTION" + shell: bash +""" + ) + + extractor = Extractor( + directory=tmp_path, + raise_on_unsupported_yaml=True, + ) + + # Should raise when ``raise_on_unsupported_yaml=True``. + with pytest.raises(UnknownYAMLFileException): + extractor.run() + + # Shouldn't raise when ``raise_on_unsupported_yaml=False``. + extractor.raise_on_unsupported_yaml = False + scanned_file_count, scanned_snippet_count, findings = extractor.run() + assert scanned_file_count == 1, "shouldn't count invalid.yaml" + assert scanned_snippet_count == 2 + assert len(findings) == 1 + + +@pytest.mark.parametrize("raise_on_unsupported_yaml", [True, False]) +def test_raises_on_invalid_yaml_syntax(tmp_path, raise_on_unsupported_yaml): + """ + When an invalid YAML file syntax is passed, it should raise an exception + even if ``raise_on_unsupported_yaml=False``. + """ + + invalid_yaml_file = tmp_path / "invalid.yaml" + invalid_yaml_file.write_text("- a\nb: c") + + extractor = Extractor( + directory=tmp_path, + raise_on_unsupported_yaml=raise_on_unsupported_yaml, + ) + + with pytest.raises(yaml.parser.ParserError): + extractor.run() diff --git a/tests/models/test_github_action.py b/tests/models/test_github_action.py index fdb8ad4..3719f2a 100644 --- a/tests/models/test_github_action.py +++ b/tests/models/test_github_action.py @@ -1,24 +1,20 @@ -from pathlib import Path - import pytest +from pydantic import ValidationError -from shellcheck_gha.models.github_action import GitHubYAML, UnknownYAMLFileException +from shellcheck_gha.models.github_action import GitHubYAML -def test_iter_shell_scripts_raises_on_unknown_file_type(): +def test_deserialization_raises_on_unknown_file_type(): """ Ensure when a non-GitHub Composite Action, and a non-GitHub Workflow YAML file - was parsed, it raises an exception when trying to iterate through the - shell scripts. + was parsed, it raises an exception when deserializing the input. """ - parsed = GitHubYAML.model_validate({"foo": "bar"}) - yaml_path = Path("dummy") - - with pytest.raises(UnknownYAMLFileException) as exc: - list(parsed.iter_shell_scripts(yaml_path)) - - assert exc.value.args == ( - "The YAML file should contain either 'jobs' or 'runs'.", - yaml_path, + with pytest.raises(ValidationError) as exc: + GitHubYAML.model_validate({"foo": "bar"}) + errors = exc.value.errors() + assert len(errors) == 1 + assert ( + errors[0]["msg"] + == "Assertion failed, The YAML file should contain either 'jobs' or 'runs'." ) From f660d140b493fb9b90395866f72123eb2a732e86 Mon Sep 17 00:00:00 2001 From: Mikail Kocak Date: Tue, 16 Apr 2024 15:18:14 +0200 Subject: [PATCH 2/3] Update README.md CLI usage --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b88c362..72e7585 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ $ shellcheck-gha --help ``` $ shellcheck-gha --help -usage: shellcheck-gha [-h] [--default-shell DEFAULT_SHELL] [--verbose] [--debug] directory +usage: shellcheck-gha [-h] [--default-shell DEFAULT_SHELL] [--verbose] [--debug] [--skip-unknown-files | --no-skip-unknown-files] [directory] positional arguments: directory @@ -89,6 +89,9 @@ options: The default shell running in the workflow(s) --verbose Show more details about the execution. --debug Add debug information (takes precedence over --verbose). + --skip-unknown-files, --no-skip-unknown-files + Whether to exit with an error on when parsing non-GitHub workflow or composite action YAML files. Skipping is useful when a directory + may be mixed with other YAML files (e.g. config files such as .github/dependabot.yaml). ``` ## Example From beec740158c9ea98371fc02d5977ba00b02f1142 Mon Sep 17 00:00:00 2001 From: Mikail <6186720+NyanKiyoshi@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:08:17 +0200 Subject: [PATCH 3/3] Apply slight rewordings and improve docs --- README.md | 2 +- shellcheck_gha/console/app.py | 3 ++- shellcheck_gha/extractor.py | 2 +- tests/extractor/test_skip_invalid_yaml.py | 1 - 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 72e7585..e216826 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ options: --debug Add debug information (takes precedence over --verbose). --skip-unknown-files, --no-skip-unknown-files Whether to exit with an error on when parsing non-GitHub workflow or composite action YAML files. Skipping is useful when a directory - may be mixed with other YAML files (e.g. config files such as .github/dependabot.yaml). + may be mixed with other YAML files (e.g. config files such as .github/dependabot.yaml). Unknown files are skipped by default. ``` ## Example diff --git a/shellcheck_gha/console/app.py b/shellcheck_gha/console/app.py index 5f32ed4..34db190 100644 --- a/shellcheck_gha/console/app.py +++ b/shellcheck_gha/console/app.py @@ -35,7 +35,8 @@ def parse_args(args: list[str] | None) -> argparse.Namespace: "Whether to exit with an error on when parsing non-GitHub workflow " "or composite action YAML files. Skipping is useful when a directory " "may be mixed with other YAML files " - "(e.g. config files such as .github/dependabot.yaml)." + "(e.g. config files such as .github/dependabot.yaml). " + "Unknown files are skipped by default." ), default=True, ) diff --git a/shellcheck_gha/extractor.py b/shellcheck_gha/extractor.py index c020413..ac2d391 100644 --- a/shellcheck_gha/extractor.py +++ b/shellcheck_gha/extractor.py @@ -114,7 +114,7 @@ def run(self) -> tuple[int, int, list[Finding]]: # Skip the file if we are not in a strict mode. if self.raise_on_unsupported_yaml is False: logger.warning( - "Skipping invalid YAML file (%s): %s", yaml_path, exc + "Skipping unsupported YAML file (%s): %s", yaml_path, exc ) continue raise UnknownYAMLFileException(exc) from exc diff --git a/tests/extractor/test_skip_invalid_yaml.py b/tests/extractor/test_skip_invalid_yaml.py index 7d13d34..74d0203 100644 --- a/tests/extractor/test_skip_invalid_yaml.py +++ b/tests/extractor/test_skip_invalid_yaml.py @@ -1,4 +1,3 @@ -# TODO: add test case that ensures it raises an exception if a YAML file is not YAML. import pytest import yaml.parser