Merge pull request #1004 from koordinates/ruff

Ruff
koordinates · Oct 10, 2024 · 7286ce5 · 7286ce5
2 parents 6383a38 + 2153d30
commit 7286ce5
Show file tree

Hide file tree

Showing 35 changed files with 141 additions and 101 deletions.
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+d3c6829ef86d6340f6e619e42cb9357192d5e881  # Ruff codebase
diff --git a/.hooks/kx-ruff.py b/.hooks/kx-ruff.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+import subprocess
+import sys
+
+filez = sys.argv[1:]
+
+print("Running ruff linter")
+# TODO: should we enable `--unsafe-fixes` here?
+# (Most of the 'unsafe' fixes appear to be "it might move a comment slightly" level of unsafe)
+subprocess.check_call(["ruff", "check", "--fix", *filez])
+
+print("Running ruff formatter")
+subprocess.check_call(["ruff", "format", *filez])
+
+# pre-commit doesn't add changed files to the index. Normally changed files fail the hook.
+# however, just calling git add sneakily works around that.
+subprocess.check_call(["git", "add"] + filez)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,10 +10,13 @@ repos:
       - id: check-yaml
       - id: check-added-large-files
       - id: check-json
-  - repo: https://github.com/psf/black
-    rev: 22.3.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.6.1
     hooks:
-      - id: black
+      - id: ruff
+        # Run both the linter and formatter (and commit the changes automatically)
+        entry: ".hooks/kx-ruff.py"
   - repo: https://github.com/sirosen/check-jsonschema
     rev: 0.6.0
     hooks:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -183,11 +183,8 @@ Continuous integration builds apps, tests, and installers for every commit on su
 
 ## Code formatting
 
-We use [Black](https://github.com/psf/black) to ensure consistent code formatting. We recommend integrating black with your editor:
+We use [Ruff](https://docs.astral.sh/ruff/) to ensure consistent code formatting. We recommend integrating Ruff with your editor - [see instructions here](https://docs.astral.sh/ruff/editors/setup/)
 
-* Sublime Text: install [sublack](https://packagecontrol.io/packages/sublack) via Package Control
-* VSCode [instructions](https://code.visualstudio.com/docs/python/editing#_formatting)
+We use the default settings, and target python 3.10+.
 
-We use the default settings, and target python 3.8+.
-
-One easy solution is to install [pre-commit](https://pre-commit.com), run `pre-commit install --install-hooks` and it'll automatically validate your changes code as a git pre-commit hook.
+One easy solution is to install [pre-commit](https://pre-commit.com), run `pre-commit install --install-hooks` and it'll automatically validate your changes as a git pre-commit hook.
diff --git a/kart/crs_util.py b/kart/crs_util.py
@@ -80,7 +80,6 @@ def parse_name(crs):
 
 
 def parse_authority(crs):
-
     if isinstance(crs, str):
         result = WKTLexer().find_pattern(
             crs, AUTHORITY_PATTERN, at_depth=1, extract_strings=True

diff --git a/kart/dataset_mixins.py b/kart/dataset_mixins.py
@@ -1,5 +1,5 @@
 from collections.abc import Iterable
-from typing import Callable, Optional
+from typing import Callable, Optional, TYPE_CHECKING
 
 import pygit2
 
@@ -352,3 +352,8 @@ def of(cls, old_path, new_path, reverse=False):
             return RawDiffDelta(pygit2.GIT_DELTA_DELETED, "D", old_path, new_path)
         else:
             return RawDiffDelta(pygit2.GIT_DELTA_MODIFIED, "M", old_path, new_path)
+
+
+if TYPE_CHECKING:
+    # This is here to avoid circular imports
+    from kart.base_dataset import BaseDataset, WorkdirDiffCache
diff --git a/kart/exec.py b/kart/exec.py
@@ -1,3 +1 @@
 
-
-
diff --git a/kart/fast_import.py b/kart/fast_import.py
@@ -389,7 +389,6 @@ def _import_single_source(
         # Features
         t1 = time.monotonic()
         if replace_ids is not None:
-
             # As we iterate over IDs, also delete them from the dataset.
             # This means we don't have to load the whole list into memory.
             def _ids():

diff --git a/kart/geometry.py b/kart/geometry.py
@@ -466,7 +466,7 @@ def hex_wkb_to_ogr(hex_wkb):
 # The GPKG spec says we should use POINT(NaN, NaN) instead.
 # Here's the WKB of that.
 # We can't use WKT here: https://github.com/OSGeo/gdal/issues/2472
-WKB_POINT_EMPTY_LE = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF8\x7F\x00\x00\x00\x00\x00\x00\xF8\x7F"
+WKB_POINT_EMPTY_LE = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x7f\x00\x00\x00\x00\x00\x00\xf8\x7f"
 
 
 def ogr_to_hex_wkb(ogr_geom):
@@ -520,7 +520,6 @@ def ogr_to_gpkg_geom(
     )
     envelope = b""
     if _add_envelope_type:
-
         if _add_envelope_type == GPKG_ENVELOPE_XY:
             envelope = ogr_geom.GetEnvelope()
         elif _add_envelope_type == GPKG_ENVELOPE_XYZ:

diff --git a/kart/html_diff_writer.py b/kart/html_diff_writer.py
@@ -30,7 +30,9 @@ def _check_output_path(cls, repo, output_path):
         return output_path or repo.workdir_path / "DIFF.html"
 
     def write_diff(self, diff_format=DiffFormat.FULL):
-        template_path = self.html_template or (Path(kart.package_data_path) / "diff-view.html")
+        template_path = self.html_template or (
+            Path(kart.package_data_path) / "diff-view.html"
+        )
         if not os.path.exists(template_path):
             raise click.UsageError("Html template not found")
         if diff_format != DiffFormat.FULL:

diff --git a/kart/meta_items.py b/kart/meta_items.py
@@ -1,4 +1,5 @@
 from enum import Enum, auto
+import binascii
 import functools
 import re
 

diff --git a/kart/raster/validate_cloud_optimized_geotiff.py b/kart/raster/validate_cloud_optimized_geotiff.py
@@ -65,7 +65,6 @@ def full_check_band(
     block_trailer_last_4_bytes_repeated,
     mask_interleaved_with_imagery,
 ):
-
     block_size = band.GetBlockSize()
     mask_band = None
     if mask_interleaved_with_imagery:
@@ -82,7 +81,6 @@ def full_check_band(
     last_offset = 0
     for y in range(yblocks):
         for x in range(xblocks):
-
             offset = band.GetMetadataItem("BLOCK_OFFSET_%d_%d" % (x, y), "TIFF")
             offset = int(offset) if offset is not None else 0
             bytecount = band.GetMetadataItem("BLOCK_SIZE_%d_%d" % (x, y), "TIFF")
@@ -215,7 +213,6 @@ def validate(ds, check_tiled=True, full_check=False):
     mask_interleaved_with_imagery = False
 
     if ifd_offset not in (8, 16):
-
         # Check if there is GDAL hidden structural metadata
         f = gdal.VSIFOpenL(filename, "rb")
         if not f:

diff --git a/kart/rev_list_objects.py b/kart/rev_list_objects.py
@@ -105,7 +105,7 @@ def rev_list_blobs(repo, start_commits, stop_commits, pathspecs):
     Yield all the blobs referenced between the start and stop commits as tuples (commit_id, path, blob).
     Each blob will only be yielded once, so not necessarily at all paths and commits where it can be found.
     """
-    for (commit_id, path, oid) in rev_list_object_oids(
+    for commit_id, path, oid in rev_list_object_oids(
         repo, start_commits, stop_commits, pathspecs
     ):
         obj = repo[oid]
@@ -118,7 +118,7 @@ def rev_list_matching_blobs(repo, start_commits, stop_commits, pathspecs, path_p
     Yield all the blobs with a path matching the given pattern referenced between the start and stop commits as tuples
     (commit_id, match_result, blob). To get the entire path, use match_result.group(0).
     """
-    for (commit_id, path, oid) in rev_list_object_oids(
+    for commit_id, path, oid in rev_list_object_oids(
         repo, start_commits, stop_commits, pathspecs
     ):
         m = path_pattern.fullmatch(path)

diff --git a/kart/s3_util.py b/kart/s3_util.py
@@ -167,9 +167,12 @@ def fetch_multiple_from_s3(s3_urls_and_paths, quiet=False):
         disable=disable,
     )
 
-    with progress as p, concurrent.futures.ThreadPoolExecutor(
-        max_workers=_FETCH_MULTIPLE_FROM_S3_WORKER_COUNT
-    ) as executor:
+    with (
+        progress as p,
+        concurrent.futures.ThreadPoolExecutor(
+            max_workers=_FETCH_MULTIPLE_FROM_S3_WORKER_COUNT
+        ) as executor,
+    ):
         futures = [executor.submit(fetch_from_s3, *args) for args in s3_urls_and_paths]
         for future in concurrent.futures.as_completed(futures):
             future.result()  # Raises any exception that occurred in the worker thread.

diff --git a/kart/socket_utils.py b/kart/socket_utils.py
@@ -4,6 +4,7 @@
 # Setting the message length higher than this has no effect - the message gets chunked by TCP anyway.
 MAX_CHUNK_LEN = 8164
 
+
 # Function modified from https://docs.python.org/3/library/socket.html#socket.socket.recvmsg
 def recv_json_and_fds(sock, maxfds=0):
     chunks = []

diff --git a/kart/spatial_filter/__init__.py b/kart/spatial_filter/__init__.py
@@ -277,9 +277,9 @@ def write_config(self, repo, update_remote=None):
             repo.del_config(self.REF_KEY)
             repo.del_config(self.OID_KEY)
             if update_remote:
-                repo.config[
-                    f"remote.{update_remote}.partialclonefilter"
-                ] = self.partial_clone_filter_spec(specify_in_full=False)
+                repo.config[f"remote.{update_remote}.partialclonefilter"] = (
+                    self.partial_clone_filter_spec(specify_in_full=False)
+                )
 
     def delete_all_config(self, repo, update_remote=None):
         for key in (self.GEOM_KEY, self.CRS_KEY, self.REF_KEY, self.OID_KEY):

diff --git a/kart/spatial_filter/index.py b/kart/spatial_filter/index.py
@@ -666,7 +666,6 @@ def _get_envelope_for_indexing_verbose(geom, transforms, feature_desc):
         click.echo(f"Geometry envelope: {minmax_envelope}")
 
         for transform in transforms:
-
             desc = getattr(transform, "desc") or str(transform)
             click.echo()
             click.echo(f"Applying transform {desc}...")

diff --git a/kart/sqlalchemy/adapter/gpkg.py b/kart/sqlalchemy/adapter/gpkg.py
@@ -144,15 +144,18 @@ def all_gpkg_meta_items(cls, v2_obj, table_name):
         """Generate all the gpkg_meta_items from the given v2 object (eg dataset)."""
         yield "sqlite_table_info", cls.generate_sqlite_table_info(v2_obj)
         yield "gpkg_contents", cls.generate_gpkg_contents(v2_obj, table_name)
-        yield "gpkg_geometry_columns", cls.generate_gpkg_geometry_columns(
-            v2_obj, table_name
+        yield (
+            "gpkg_geometry_columns",
+            cls.generate_gpkg_geometry_columns(v2_obj, table_name),
         )
         yield "gpkg_spatial_ref_sys", cls.generate_gpkg_spatial_ref_sys(v2_obj)
-        yield "gpkg_metadata", cls.generate_gpkg_metadata(
-            v2_obj, table_name, reference=False
+        yield (
+            "gpkg_metadata",
+            cls.generate_gpkg_metadata(v2_obj, table_name, reference=False),
         )
-        yield "gpkg_metadata_reference", cls.generate_gpkg_metadata(
-            v2_obj, table_name, reference=True
+        yield (
+            "gpkg_metadata_reference",
+            cls.generate_gpkg_metadata(v2_obj, table_name, reference=True),
         )
 
     @classmethod

diff --git a/kart/sqlalchemy/adapter/sqlserver.py b/kart/sqlalchemy/adapter/sqlserver.py
@@ -245,8 +245,11 @@ def all_v2_meta_items_including_empty(
             if not auth_name and not auth_code:
                 auth_name, auth_code = "CUSTOM", crs_info["srid"]
             wkt = crs_info["well_known_text"] or ""
-            yield f"crs/{auth_name}:{auth_code}.wkt", crs_util.normalise_wkt(
-                crs_util.ensure_authority_specified(wkt, auth_name, auth_code)
+            yield (
+                f"crs/{auth_name}:{auth_code}.wkt",
+                crs_util.normalise_wkt(
+                    crs_util.ensure_authority_specified(wkt, auth_name, auth_code)
+                ),
             )
 
     @classmethod

diff --git a/kart/tabular/export.py b/kart/tabular/export.py
@@ -65,7 +65,7 @@ def get_driver(destination_spec):
     Given a destination like target.gpkg, GPKG:target.gpkg, postgresql://x/y/z,
     returns the driver to use and the target that the driver should write to.
     """
-    match = re.match(r'([^:]+)://', destination_spec)
+    match = re.match(r"([^:]+)://", destination_spec)
     if match:
         return get_driver_by_shortname(match.group(1)), destination_spec
     if ":" in destination_spec:

diff --git a/kart/tabular/pk_generation.py b/kart/tabular/pk_generation.py
@@ -253,7 +253,6 @@ def features(self):
                 next_new_pk += 1
 
         if buffered_inserts:
-
             # Look for matching inserts-deletes - reassign the PK from the delete, treat is as an edit:
             yield from self._match_similar_features_and_remove(
                 self._find_deleted_features(hash_to_unassigned_pks), buffered_inserts

diff --git a/kart/tabular/v3.py b/kart/tabular/v3.py
@@ -349,9 +349,10 @@ def import_iter_feature_blobs(
                 )
                 if existing_feature == feature:
                     # Nothing changed? No need to rewrite the feature blob
-                    yield self.encode_pks_to_path(
-                        pk_values, schema=schema
-                    ), existing_data
+                    yield (
+                        self.encode_pks_to_path(pk_values, schema=schema),
+                        existing_data,
+                    )
                 else:
                     yield self.encode_feature(feature, schema)
         else:

diff --git a/kart/tabular/working_copy/base.py b/kart/tabular/working_copy/base.py
@@ -46,7 +46,7 @@ class TableWorkingCopy(WorkingCopyPart):
     SUPPORTED_META_ITEMS = (
         meta_items.TITLE,
         meta_items.SCHEMA_JSON,
-        meta_items.CRS_DEFINITIONS
+        meta_items.CRS_DEFINITIONS,
         # Not description, not metadata.xml, except where overridden by a subclass.
     )
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,21 @@
-[tool.black]
-target-version = ['py37']
-
 [tool.pyright]
 exclude = ["**/__pycache__", "**/build", "venv"]
+
+
+[tool.ruff]
+line-length = 88
+target-version = "py310"
+force-exclude = true
+
+[tool.ruff.lint]
+preview = true
+select = [
+  "F402",
+  "F404",
+  "F823",
+  "F821",
+  "F822",
+  "E112",
+  "E113",
+  "E902",
+]
diff --git a/tests/scripts/history-creator.py b/tests/scripts/history-creator.py
@@ -273,7 +273,6 @@ def evolve(typ, old):
     t0 = time.monotonic()
     try:
         for i in range(options.commits or 1):
-
             with wc.session() as sess:
                 for table in tables:
                     update(sess, table, num_updates)

diff --git a/tests/test_e2e.py b/tests/test_e2e.py
@@ -97,7 +97,7 @@ def test_e2e(
             r = cli_runner.invoke(["diff"])
             assert r.exit_code == 0
             line = r.stdout.splitlines()[0]
-            assert re.match(fr"\+\+\+ {table}:feature:\d+$", line), line
+            assert re.match(rf"\+\+\+ {table}:feature:\d+$", line), line
 
             # commit it
             r = cli_runner.invoke(["commit", "-m", "commit-1"])

diff --git a/tests/test_e2e_sno.py b/tests/test_e2e_sno.py
@@ -105,7 +105,7 @@ def test_e2e_sno(
             r = cli_runner.invoke(["diff"])
             assert r.exit_code == 0
             line = r.stdout.splitlines()[0]
-            assert re.match(fr"\+\+\+ {table}:feature:\d+$", line), line
+            assert re.match(rf"\+\+\+ {table}:feature:\d+$", line), line
 
             # commit it
             r = cli_runner.invoke(["commit", "-m", "commit-1"])

diff --git a/tests/test_export.py b/tests/test_export.py
@@ -135,11 +135,11 @@ def test_layer_creation_options(data_archive, cli_runner):
             col_names = [row[1] for row in r]
             # FID column has been named id, geometry column has been named geometry.
             assert col_names == [
-                'id',
-                'geometry',
-                'date_adjusted',
-                'survey_reference',
-                'adjusted_nodes',
+                "id",
+                "geometry",
+                "date_adjusted",
+                "survey_reference",
+                "adjusted_nodes",
             ]
 
 
@@ -182,21 +182,21 @@ def test_primary_key_as_field(field_option, data_archive, cli_runner):
                 # So --primary-key-as-field means that we end up with two id columns, FID and ID.
                 # This is redundant here but would be useful if the PK column had string values.
                 assert col_names == [
-                    'fid',
-                    'geom',
-                    'id',
-                    'date_adjusted',
-                    'survey_reference',
-                    'adjusted_nodes',
+                    "fid",
+                    "geom",
+                    "id",
+                    "date_adjusted",
+                    "survey_reference",
+                    "adjusted_nodes",
                 ]
             else:
                 # Using --no-primary-key-as-field gets rid of the redundant column here.
                 assert col_names == [
-                    'fid',
-                    'geom',
-                    'date_adjusted',
-                    'survey_reference',
-                    'adjusted_nodes',
+                    "fid",
+                    "geom",
+                    "date_adjusted",
+                    "survey_reference",
+                    "adjusted_nodes",
                 ]
 
             # Both the fid column and the id column (if present) should contain the primary key values.

diff --git a/tests/test_init.py b/tests/test_init.py
@@ -912,7 +912,6 @@ def test_init_import_alt_names(data_archive, tmp_path, cli_runner, chdir):
     with chdir(repo_path):
         # working copy exists
         with Db_GPKG.create_engine("wc.gpkg").connect() as conn:
-
             expected_tables = set(a[3].replace("/", "__") for a in ARCHIVE_PATHS)
             r = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
             db_tables = set(row[0] for row in r)