Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: ignore unsupported YAML files #6

Merged
merged 3 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ $ shellcheck-gha --help

```
$ shellcheck-gha --help
usage: shellcheck-gha [-h] [--default-shell DEFAULT_SHELL] [--verbose] [--debug] directory
usage: shellcheck-gha [-h] [--default-shell DEFAULT_SHELL] [--verbose] [--debug] [--skip-unknown-files | --no-skip-unknown-files] [directory]

positional arguments:
directory
Expand All @@ -89,6 +89,9 @@ options:
The default shell running in the workflow(s)
--verbose Show more details about the execution.
--debug Add debug information (takes precedence over --verbose).
--skip-unknown-files, --no-skip-unknown-files
Whether to exit with an error on when parsing non-GitHub workflow or composite action YAML files. Skipping is useful when a directory
may be mixed with other YAML files (e.g. config files such as .github/dependabot.yaml). Unknown files are skipped by default.
```

## Example
Expand Down
13 changes: 13 additions & 0 deletions shellcheck_gha/console/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ def parse_args(args: list[str] | None) -> argparse.Namespace:
action="store_true",
help="Add debug information (takes precedence over --verbose).",
)
parser.add_argument(
"--skip-unknown-files",
action=argparse.BooleanOptionalAction,
help=(
"Whether to exit with an error on when parsing non-GitHub workflow "
"or composite action YAML files. Skipping is useful when a directory "
"may be mixed with other YAML files "
"(e.g. config files such as .github/dependabot.yaml). "
"Unknown files are skipped by default."
),
default=True,
)
return parser.parse_args(args)


Expand All @@ -53,6 +65,7 @@ def main(out=sys.stdout, args: list[str] | None = None) -> None:
num_files_scanned, num_snippets_scanned, findings = Extractor(
directory=directory,
default_shell=ns.default_shell,
raise_on_unsupported_yaml=not ns.skip_unknown_files,
).run()

print(f"=== Results: {len(findings)} file(s) have findings ===")
Expand Down
29 changes: 26 additions & 3 deletions shellcheck_gha/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@
from pathlib import Path

import yaml
from pydantic import ValidationError

from shellcheck_gha.models.github_action import GitHubYAML, ShellSnippet
from shellcheck_gha.models.github_action import (
GitHubYAML,
ShellSnippet,
UnknownYAMLFileException,
)
from shellcheck_gha.models.shell_check_json1 import ShellCheckOutput

logger = logging.getLogger(__name__)
Expand All @@ -24,6 +29,13 @@ class Finding:
@dataclasses.dataclass
class Extractor:
directory: Path

# Whether to raise an exception when the parsed YAML
# file is not a GitHub workflow or action.
# ``raise_on_invalid_yaml=False`` is useful when a folder
# may contain other files (e.g., dependabot config files).
raise_on_unsupported_yaml: bool = True

default_shell: str = DEFAULT_SHELL

def iter_workflow_paths(self):
Expand Down Expand Up @@ -93,11 +105,22 @@ def run(self) -> tuple[int, int, list[Finding]]:

logger.info("Scanning the directory: %s", self.directory)
for yaml_path in self.iter_workflow_paths():
num_files_scanned += 1
logger.info("Checking the YAML file: %s", yaml_path)

with yaml_path.open() as fp:
parsed_yaml = GitHubYAML.model_validate(yaml.safe_load(fp))
try:
parsed_yaml = GitHubYAML.model_validate(yaml.safe_load(fp))
except ValidationError as exc:
# Skip the file if we are not in a strict mode.
if self.raise_on_unsupported_yaml is False:
logger.warning(
"Skipping unsupported YAML file (%s): %s", yaml_path, exc
)
continue
raise UnknownYAMLFileException(exc) from exc

# Only increment if the file is successfully parsed.
num_files_scanned += 1

for snippet in parsed_yaml.iter_shell_scripts(yaml_path):
num_snippets_scanned += 1
Expand Down
23 changes: 14 additions & 9 deletions shellcheck_gha/models/github_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
from typing import Optional, Generator

from pydantic import BaseModel, Field, computed_field
from pydantic import BaseModel, Field, computed_field, model_validator, ValidationError


class UnknownYAMLFileException(Exception):
Expand All @@ -11,6 +11,12 @@ class UnknownYAMLFileException(Exception):
a GitHub Workflow, or a GitHub Composite Action.
"""

def __init__(self, cause: ValidationError):
self.cause: ValidationError = cause

def __str__(self) -> str:
return str(self.cause)


@dataclasses.dataclass
class ShellSnippet:
Expand Down Expand Up @@ -62,20 +68,19 @@ class GitHubYAML(BaseModel):
jobs: Optional[dict[str, GitHubSteps]] = None
runs: Optional[GitHubSteps] = None

@model_validator(mode="after")
def check_contains_one_of_required_fields(self) -> "GitHubYAML":
if self.jobs or self.runs:
return self
raise AssertionError("The YAML file should contain either 'jobs' or 'runs'.")

def iter_shell_scripts(
self, yaml_path: Path
) -> Generator[ShellSnippet, None, None]:
if self.jobs:
for job in self.jobs.values():
for snippet in job.iter_shell_scripts(yaml_path):
yield snippet
elif self.runs:
if self.runs:
for snippet in self.runs.iter_shell_scripts(yaml_path):
yield snippet
else:
# This code should never be reached unless a non-GitHub workflow or action
# YAML file was provided to the CLI.
raise UnknownYAMLFileException(
"The YAML file should contain either 'jobs' or 'runs'.",
yaml_path,
)
Empty file added tests/extractor/__init__.py
Empty file.
File renamed without changes.
65 changes: 65 additions & 0 deletions tests/extractor/test_skip_invalid_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pytest
import yaml.parser

from shellcheck_gha.extractor import Extractor
from shellcheck_gha.models.github_action import UnknownYAMLFileException


def test_skips_yaml_unsupported_files(tmp_path):
"""
When an unknown YAML file (non-GitHub workflow and non-composite action YAML)
is passed and ``raise_on_unsupported_yaml`` is set to ``False``
it shouldn't raise an exception, instead it should skip the file
and proceed to the next YAML files.
"""

invalid_yaml_file = tmp_path / "invalid.yaml"
invalid_yaml_file.write_text("{}")

valid_yaml = tmp_path / "valid.yaml"
valid_yaml.write_text(
"""\
runs:
using: "composite"
steps:
- run: echo $BAD_COMPOSITE_ACTION
shell: bash
- run: echo "$GOOD_COMPOSITE_ACTION"
shell: bash
"""
)

extractor = Extractor(
directory=tmp_path,
raise_on_unsupported_yaml=True,
)

# Should raise when ``raise_on_unsupported_yaml=True``.
with pytest.raises(UnknownYAMLFileException):
extractor.run()

# Shouldn't raise when ``raise_on_unsupported_yaml=False``.
extractor.raise_on_unsupported_yaml = False
scanned_file_count, scanned_snippet_count, findings = extractor.run()
assert scanned_file_count == 1, "shouldn't count invalid.yaml"
assert scanned_snippet_count == 2
assert len(findings) == 1


@pytest.mark.parametrize("raise_on_unsupported_yaml", [True, False])
def test_raises_on_invalid_yaml_syntax(tmp_path, raise_on_unsupported_yaml):
"""
When an invalid YAML file syntax is passed, it should raise an exception
even if ``raise_on_unsupported_yaml=False``.
"""

invalid_yaml_file = tmp_path / "invalid.yaml"
invalid_yaml_file.write_text("- a\nb: c")

extractor = Extractor(
directory=tmp_path,
raise_on_unsupported_yaml=raise_on_unsupported_yaml,
)

with pytest.raises(yaml.parser.ParserError):
extractor.run()
26 changes: 11 additions & 15 deletions tests/models/test_github_action.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
from pathlib import Path

import pytest
from pydantic import ValidationError

from shellcheck_gha.models.github_action import GitHubYAML, UnknownYAMLFileException
from shellcheck_gha.models.github_action import GitHubYAML


def test_iter_shell_scripts_raises_on_unknown_file_type():
def test_deserialization_raises_on_unknown_file_type():
"""
Ensure when a non-GitHub Composite Action, and a non-GitHub Workflow YAML file
was parsed, it raises an exception when trying to iterate through the
shell scripts.
was parsed, it raises an exception when deserializing the input.
"""

parsed = GitHubYAML.model_validate({"foo": "bar"})
yaml_path = Path("dummy")

with pytest.raises(UnknownYAMLFileException) as exc:
list(parsed.iter_shell_scripts(yaml_path))

assert exc.value.args == (
"The YAML file should contain either 'jobs' or 'runs'.",
yaml_path,
with pytest.raises(ValidationError) as exc:
GitHubYAML.model_validate({"foo": "bar"})
errors = exc.value.errors()
assert len(errors) == 1
assert (
errors[0]["msg"]
== "Assertion failed, The YAML file should contain either 'jobs' or 'runs'."
)