From 69fd57e2bcf2f26d84d2847c7c62162f664d51aa Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 18 Nov 2024 14:07:04 -0600 Subject: [PATCH] Move `check_sort_dbt_yaml_files` linter to pre-commit (#644) * Move `check_sort_dbt_yaml_files` linter to pre-commit * Downgrade pre-commit hooks since they're causing new errors * Update check_sort_dbt_yaml_files.py to accept a list of files for pre-commit * Add type annotations to unsorted_*_files to appease mypy --- .../workflows/check_sort_dbt_yaml_files.yaml | 39 ------ .pre-commit-config.yaml | 9 ++ dbt/scripts/check_sort_dbt_yaml_files.py | 116 ++++++++++++------ 3 files changed, 86 insertions(+), 78 deletions(-) delete mode 100644 .github/workflows/check_sort_dbt_yaml_files.yaml diff --git a/.github/workflows/check_sort_dbt_yaml_files.yaml b/.github/workflows/check_sort_dbt_yaml_files.yaml deleted file mode 100644 index da7bea44a..000000000 --- a/.github/workflows/check_sort_dbt_yaml_files.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: check-sort-dbt-yaml-files - -on: - pull_request: - branches: [master] - push: - branches: [master] - -env: - UV_SYSTEM_PYTHON: 1 - -jobs: - check-sort-dbt-yaml-files: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - enable-cache: true - cache-dependency-glob: | - dbt/pyproject.toml - dbt/uv.lock - - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version-file: dbt/.python-version - - - name: Install Python dependencies - working-directory: dbt - shell: bash - run: uv pip install ".[ci_checks]" - - - name: Run dbt yaml sort checks - working-directory: dbt - run: python scripts/check_sort_dbt_yaml_files.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 650e7a884..1e9dca2ec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,3 +30,12 @@ repos: - --fix # Formatter - id: ruff-format + - repo: local + hooks: + - id: check-sort-dbt-yaml-files + name: check-sort-dbt-yaml-files + entry: python3 dbt/scripts/check_sort_dbt_yaml_files.py + language: system + types_or: [yaml, markdown] + files: ^dbt/ + exclude: venv diff --git a/dbt/scripts/check_sort_dbt_yaml_files.py b/dbt/scripts/check_sort_dbt_yaml_files.py index 70616fa77..9d4278b09 100644 --- a/dbt/scripts/check_sort_dbt_yaml_files.py +++ b/dbt/scripts/check_sort_dbt_yaml_files.py @@ -1,5 +1,6 @@ import os import re +import sys from collections import defaultdict import yaml @@ -340,42 +341,68 @@ def check_all_files(directory): Returns: tuple: Results of unsorted files and errors for different checks. """ - unsorted_columns_files = defaultdict(int) - unsorted_data_tests_files = defaultdict(int) - error_files = [] - unsorted_md_files = [] - unsorted_columns_md_files = [] - unsorted_shared_columns_md_files = [] + file_paths_to_check = [] for root, _, files in os.walk(directory): if "venv" in root: continue for file in files: - file_path = os.path.join(root, file) - if file.endswith(".yaml") or file.endswith(".yml"): - unsorted_columns, errors = check_columns(file_path) - for key, value in unsorted_columns.items(): - unsorted_columns_files[key] += value - unsorted_data_tests, errors = check_data_tests(file_path) - for key, value in unsorted_data_tests.items(): - unsorted_data_tests_files[key] += value - if errors: - error_files.extend(errors) - elif file == "docs.md": - unsorted_md = check_md_file(file_path) - if unsorted_md: - unsorted_md_files.append(unsorted_md) - elif file == "columns.md": - unsorted_columns_md = check_columns_md_file(file_path) - if unsorted_columns_md: - unsorted_columns_md_files.append(unsorted_columns_md) - elif file == "shared_columns.md": - unsorted_shared_columns_md = check_shared_columns_md_file( - file_path + if ( + file.endswith(".yaml") + or file.endswith(".yml") + or (file in ("docs.md", "columns.md", "shared_columns.md")) + ): + file_paths_to_check.append(os.path.join(root, file)) + + return check_files(file_paths_to_check) + + +def check_files(file_paths: list[str]): + """ + Check all files in a list of filepaths for sorted YAML keys and markdown + headings. + + Args: + file_paths (list[str]): The list of files to check + + Returns: + tuple: Results of unsorted files and errors for different checks. + """ + unsorted_columns_files: dict[str, int] = defaultdict(int) + unsorted_data_tests_files: dict[str, int] = defaultdict(int) + error_files = [] + unsorted_md_files = [] + unsorted_columns_md_files = [] + unsorted_shared_columns_md_files = [] + for file_path in file_paths: + if not os.path.isfile(file_path): + raise ValueError( + f"check_files got a filepath that doesn't exist: {file_path}" + ) + if file_path.endswith(".yaml") or file_path.endswith(".yml"): + unsorted_columns, errors = check_columns(file_path) + for key, value in unsorted_columns.items(): + unsorted_columns_files[key] += value + unsorted_data_tests, errors = check_data_tests(file_path) + for key, value in unsorted_data_tests.items(): + unsorted_data_tests_files[key] += value + if errors: + error_files.extend(errors) + elif os.path.basename(file_path) == "docs.md": + unsorted_md = check_md_file(file_path) + if unsorted_md: + unsorted_md_files.append(unsorted_md) + elif os.path.basename(file_path) == "columns.md": + unsorted_columns_md = check_columns_md_file(file_path) + if unsorted_columns_md: + unsorted_columns_md_files.append(unsorted_columns_md) + elif os.path.basename(file_path) == "shared_columns.md": + unsorted_shared_columns_md = check_shared_columns_md_file( + file_path + ) + if unsorted_shared_columns_md: + unsorted_shared_columns_md_files.append( + unsorted_shared_columns_md ) - if unsorted_shared_columns_md: - unsorted_shared_columns_md_files.append( - unsorted_shared_columns_md - ) return ( unsorted_columns_files, @@ -388,14 +415,25 @@ def check_all_files(directory): if __name__ == "__main__": - ( - unsorted_columns_files, - unsorted_data_tests_files, - error_files, - unsorted_md_files, - unsorted_columns_md_files, - unsorted_shared_columns_md_files, - ) = check_all_files(os.getcwd()) + args = sys.argv[1:] + if args: + ( + unsorted_columns_files, + unsorted_data_tests_files, + error_files, + unsorted_md_files, + unsorted_columns_md_files, + unsorted_shared_columns_md_files, + ) = check_files(args) + else: + ( + unsorted_columns_files, + unsorted_data_tests_files, + error_files, + unsorted_md_files, + unsorted_columns_md_files, + unsorted_shared_columns_md_files, + ) = check_all_files(os.getcwd()) if unsorted_columns_files: print("The following files have unsorted columns:")