Refactor: modify schema class to be more generic.

Tabular schemas look something like this: [{ "id": "deadbeef", "name": "fid", "dataType": "integer", "primaryKeyIndex": 0, "size": 64}, ... ] Point cloud schemas look something like this: [{"name": "Red", "dataType": "integer", "size": 32}, ...] Raster schemas will look different again. Removing the requirement that schemas themselves follow a particular schema, and, doing away with the two different internal representations of schemas: the list of dicts (json-dumpable) and the Schema object (not json-dumpable). With the magic of python, it's possible to make the Schema object *be* a list of dicts (actually a tuple) and be JSON dumpable, so, we don't need to convert between the two all the time.
koordinates · Feb 1, 2023 · 7ffdadb · 7ffdadb
1 parent e96b4c0
commit 7ffdadb
Show file tree

Hide file tree

Showing 29 changed files with 448 additions and 313 deletions.
diff --git a/kart/apply.py b/kart/apply.py
@@ -212,9 +212,16 @@ def convert_delta(delta):
 
 
 def parse_meta_diff(meta_diff_input, allow_minimal_updates=False):
+    def convert_delta(delta):
+        if delta.old_key == "schema.json" or delta.new_key == "schema.json":
+            return Schema.schema_delta_from_raw_delta(delta)
+        return delta
+
     return DeltaDiff(
-        Delta.from_key_and_plus_minus_dict(
-            k, v, allow_minimal_updates=allow_minimal_updates
+        convert_delta(
+            Delta.from_key_and_plus_minus_dict(
+                k, v, allow_minimal_updates=allow_minimal_updates
+            )
         )
         for (k, v) in meta_diff_input.items()
     )
@@ -229,9 +236,9 @@ def parse_feature_diff(
 
     schema_delta = meta_diff.get("schema.json") if meta_diff else None
     if schema_delta and schema_delta.old_value:
-        old_schema = Schema.from_column_dicts(schema_delta.old_value)
+        old_schema = schema_delta.old_value
     if schema_delta and schema_delta.new_value:
-        new_schema = Schema.from_column_dicts(schema_delta.new_value)
+        new_schema = schema_delta.new_value
 
     delta_parser = FeatureDeltaParser(
         old_schema,

diff --git a/kart/base_diff_writer.py b/kart/base_diff_writer.py
@@ -452,14 +452,12 @@ def _get_delta_fetcher(self, ds_path):
             return NullDeltaFetcher(ds_path, dataset.DATASET_TYPE)
 
     def _get_old_and_new_schema(self, ds_path, ds_diff):
-        from kart.schema import Schema
-
         old_schema = new_schema = None
         schema_delta = ds_diff.recursive_get(["meta", "schema.json"])
         if schema_delta and schema_delta.old_value:
-            old_schema = Schema.from_column_dicts(schema_delta.old_value)
+            old_schema = schema_delta.old_value
         if schema_delta and schema_delta.new_value:
-            new_schema = Schema.from_column_dicts(schema_delta.new_value)
+            new_schema = schema_delta.new_value
         if old_schema or new_schema:
             return old_schema, new_schema
 

diff --git a/kart/crs_util.py b/kart/crs_util.py
@@ -190,7 +190,7 @@ def get_identifier_int_from_dataset(dataset, crs_name=None):
         if num_geom_columns == 0:
             return None
         elif num_geom_columns == 1:
-            crs_name = geom_columns[0].extra_type_info.get("geometryCRS", None)
+            crs_name = geom_columns[0].get("geometryCRS", None)
         else:
             raise ValueError("Dataset has more than one geometry column")
 

diff --git a/kart/meta_items.py b/kart/meta_items.py
@@ -24,7 +24,7 @@ def assert_list_of_strings(self, meta_item):
             assert isinstance(meta_item, list)
             for tag in meta_item:
                 assert isinstance(tag, str)
-        except AssertionError as e:
+        except AssertionError:
             raise AssertionError("tags.json should be a list of strings")
         return meta_item
 
@@ -37,13 +37,13 @@ class SchemaJsonFileType:
     def decode_from_bytes(self, data):
         if data is None:
             return None
-        return Schema.normalise_column_dicts(json_unpack(data))
+        return Schema.loads(data)
 
     def encode_to_bytes(self, meta_item):
         if meta_item is None:
             return None
         if not isinstance(meta_item, Schema):
-            meta_item = Schema.from_column_dicts(meta_item)
+            meta_item = Schema(meta_item)
         return meta_item.dumps()
 
 

diff --git a/kart/point_cloud/schema_util.py b/kart/point_cloud/schema_util.py
@@ -1,4 +1,5 @@
 from kart.exceptions import NotYetImplemented
+from kart.schema import Schema
 
 # Utility functions for dealing with Point Cloud schemas.
 
@@ -48,13 +49,16 @@
 
 
 PDRF_TO_SCHEMA = {
-    0: PDRF0_SCHEMA + [],
-    1: PDRF0_SCHEMA + [GPS_TIME],
-    2: PDRF0_SCHEMA + RED_GREEN_BLUE,
-    3: PDRF0_SCHEMA + [GPS_TIME] + RED_GREEN_BLUE,
-    6: PDRF6_SCHEMA + [],
-    7: PDRF6_SCHEMA + RED_GREEN_BLUE,
-    8: PDRF6_SCHEMA + RED_GREEN_BLUE + [INFRARED],
+    k: Schema(v)
+    for k, v in {
+        0: PDRF0_SCHEMA + [],
+        1: PDRF0_SCHEMA + [GPS_TIME],
+        2: PDRF0_SCHEMA + RED_GREEN_BLUE,
+        3: PDRF0_SCHEMA + [GPS_TIME] + RED_GREEN_BLUE,
+        6: PDRF6_SCHEMA + [],
+        7: PDRF6_SCHEMA + RED_GREEN_BLUE,
+        8: PDRF6_SCHEMA + RED_GREEN_BLUE + [INFRARED],
+    }.items()
 }
 
 PDRF_TO_RECORD_LENGTH = {
@@ -114,9 +118,9 @@ def pdal_schema_to_kart_schema(pdal_schema):
     Given the JSON schema as PDAL loaded it, format it as a Kart compatiblie schema.json item.
     Eg "type" -> "dataType", size is measured in bits.
     """
-    return [
-        _pdal_col_schema_to_kart_col_schema(col) for col in pdal_schema["dimensions"]
-    ]
+    return Schema(
+        [_pdal_col_schema_to_kart_col_schema(col) for col in pdal_schema["dimensions"]]
+    )
 
 
 def _pdal_col_schema_to_kart_col_schema(pdal_col_schema):