update: (wip) working for dem support

ozekik · Oct 16, 2024 · 4c7b652 · 4c7b652
1 parent f55ed12
commit 4c7b652
Show file tree

Hide file tree

Showing 10 changed files with 514 additions and 119 deletions.
diff --git a/plateaukit/cli/commands/prebuild.py b/plateaukit/cli/commands/prebuild.py
@@ -22,7 +22,7 @@
     "-t",
     "types",
     type=click.Choice(
-        ["bldg", "brid", "tran"],
+        ["bldg", "brid", "tran", "dem"],
         case_sensitive=True,
     ),
     default=["bldg"],

diff --git a/plateaukit/core/dataset/_dataset.py b/plateaukit/core/dataset/_dataset.py
@@ -13,7 +13,7 @@
     get_area,
 )
 from plateaukit.core.dataset._to_cityjson import to_cityjson
-from plateaukit.core.dataset._to_geojson import to_geojson
+from plateaukit.core.dataset._to_geojson import dem_to_geojson, to_geojson
 
 try:
     from pyogrio import read_dataframe
@@ -236,6 +236,36 @@ def to_geojson(
             # split=split,
         )
 
+    def dem_to_geojson(
+        self,
+        outfile: str | PathLike,
+        *,
+        seq: bool = False,
+        split: int = 1,
+        selection: list[str] | None = None,
+        target_epsg: int | None = 4326,
+        target_reduction: float | None = None,
+        progress_messages: dict | None = None,
+    ):
+        """Export GeoJSON from PLATEAU dataset DEMs.
+
+        Args:
+            outfile: Output file path.
+            split: Split the output into specified number of files.
+            **kwargs: Keyword arguments for the generator.
+        """
+
+        return dem_to_geojson(
+            self,
+            outfile,
+            seq=seq,
+            split=split,
+            selection=selection,
+            target_epsg=target_epsg,
+            target_reduction=target_reduction,
+            progress_messages=progress_messages,
+        )
+
     def to_cityjson(
         self,
         outfile: str | PathLike,

diff --git a/plateaukit/core/dataset/_to_geojson.py b/plateaukit/core/dataset/_to_geojson.py
@@ -9,6 +9,7 @@
 from plateaukit.logger import logger
 from plateaukit.readers.citygml.reader import CityGMLReader
 from plateaukit.transformers.reprojection import ReprojectionTransformer
+from plateaukit.transformers.simplify import SimplifyTransformer
 
 
 def to_geojson(
@@ -237,3 +238,109 @@ def to_geojson(
     #     codelist_infiles=codelist_infiles,
     #     **kwargs,
     # )
+
+
+def dem_to_geojson(
+    self,
+    outfile: str | PathLike,
+    *,
+    seq=False,
+    split: int = 1,
+    selection: list[str] | None = None,
+    target_epsg: int | None = None,
+    target_reduction: float | None = None,
+    progress_messages: dict | None = None,
+    **kwargs,
+):
+    """Export GeoJSON from PLATEAU dataset DEMs.
+
+    Args:
+        outfile: Output file path.
+        types: CityGML object types to include.
+        split: Split the output into specified number of files.
+        seq: Export GeoJSONSeq.
+        **kwargs: Keyword arguments for the generator.
+    """
+    if not self.dataset_id:
+        raise Exception("Missing dataset_id")
+
+    # NOTE: this is intentional but to be refactored in the future
+    config = Config()
+    record = config.datasets[self.dataset_id]
+
+    if "citygml" not in record:
+        raise Exception("Missing CityGML data")
+
+    file_path = Path(record["citygml"])
+
+    infiles = []
+
+    # TODO: Refactor
+    type = "dem"
+
+    # TODO: Fix
+    pat = re.compile(rf".*udx\/{type}\/.*\.gml$")
+
+    if zipfile.is_zipfile(file_path):
+        with zipfile.ZipFile(file_path) as f:
+            namelist = f.namelist()
+            targets = list(filter(lambda x: pat.match(x), namelist))
+
+            if not targets:
+                raise RuntimeError(
+                    f"Data type '{type}' not found in '{self.dataset_id}'"
+                )
+
+            # NOTE: zipfs requires POSIX path
+            infiles += [str(PurePosixPath("/", target)) for target in targets]
+    else:
+        infiles += [str(Path(file_path, "udx", type, "*.gml"))]
+
+    logger.debug([type, infiles, outfile])
+
+    # Sort by a part of filename to group by areas; TODO: Update this
+    # try:
+    #     infiles = sorted(infiles, key=lambda x: Path(x).stem.split("_")[0])
+    # except:
+    #     infiles = sorted(infiles)
+    infiles = sorted(infiles)
+
+    # Codelists
+    codelist_infiles = None
+    if not zipfile.is_zipfile(file_path):
+        # TODO: Test support for non-zip codelists
+        codelist_infiles = [str(Path(file_path, "codelists", "*.xml"))]
+
+    reader = CityGMLReader()
+
+    readable = reader.scan_files(
+        infiles,
+        codelist_infiles=codelist_infiles,
+        zipfile=file_path,
+        selection=selection,
+    )
+
+    # TODO: Fix typing
+    transformers: list = []
+
+    if target_reduction:
+        transformers += [
+            ReprojectionTransformer(target_epsg=3857),
+            SimplifyTransformer(target_reduction=target_reduction),
+        ]
+
+    if target_epsg:
+        transformers.append(ReprojectionTransformer(target_epsg=target_epsg))
+
+    for transformer in transformers:
+        readable = transformer.transform(readable)
+
+    parallel_writer = ParallelWriter(GeoJSONWriter)
+    parallel_writer.transform(
+        readable,
+        str(outfile),
+        seq=seq,
+        split=split,
+        progress_messages=progress_messages,
+        altitude=True,
+    )
diff --git a/plateaukit/prebuild/__init__.py b/plateaukit/prebuild/__init__.py
@@ -12,8 +12,9 @@
 from plateaukit.logger import logger
 
 from ._cityobjectsdb import _prebuild_cityobjects_db
+from ._dem import _prebuild_dem
 
-_supported_types = ["bldg", "brid", "tran"]
+_supported_types = ["bldg", "brid", "tran", "dem"]
 
 
 def prebuild(
@@ -51,6 +52,10 @@ def prebuild(
         logger.debug(f"Temporary directory: {tdir}")
 
         for type in types:
+            # NOTE: Skip DEM here
+            if type == "dem":
+                continue
+
             outfile_geojsonl = Path(tdir, f"{dataset_id}.{type}.geojsonl")
 
             dataset = load_dataset(dataset_id)
@@ -80,13 +85,25 @@ def prebuild(
             _prebuild_cityobjects_db(
                 dataset_id,
                 dest_path,
-                types=types,
+                types=[type for type in types if type != "dem"],
                 tmpdir=tdir,
                 split=split,
             )
 
             config.datasets[dataset_id]["cityobjects"] = dest_path
 
+        if "dem" in types:
+            dest_path = str(Path(config.data_dir, f"{dataset_id}.dem.parquet"))
+
+            _prebuild_dem(
+                dataset_id,
+                dest_path,
+                tmpdir=tdir,
+                split=split,
+            )
+
+            config.datasets[dataset_id]["dem"] = dest_path
+
         with console.status(f"Writing {display_name}...") as status:
             if format == "gpkg":
                 if types != ["bldg"]:
@@ -120,6 +137,9 @@ def prebuild(
                 dest_path_map = {}
 
                 for type in types:
+                    if type == "dem":
+                        continue
+
                     tmp_file_paths = []
 
                     for filename in glob.glob(str(Path(tdir, "*.geojsonl"))):

diff --git a/plateaukit/prebuild/_dem.py b/plateaukit/prebuild/_dem.py
@@ -0,0 +1,74 @@
+import glob
+from pathlib import Path
+
+import geopandas as gpd
+import pandas as pd
+
+from plateaukit.core.dataset import load_dataset
+
+
+def _prebuild_dem(
+    dataset_id: str,
+    outfile: str = "/tmp/dem.parquet",
+    tmpdir: str = "/tmp",
+    split: int = 10,
+):
+    dataset = load_dataset(dataset_id)
+
+    type = "dem"
+
+    outfile_geojsonl = Path(tmpdir, f"{dataset_id}.dem.geojsonl")
+    dataset.dem_to_geojson(
+        outfile_geojsonl,
+        seq=True,
+        split=split,
+        target_reduction=0.9,
+        progress_messages={"description": f"Generating GeoJSONSeq files: {type}"},
+    )
+
+    # decoder = msgspec.json.Decoder(dict)
+
+    if split > 1:
+        filename_pattern = str(Path(tmpdir, "*.dem.*.geojsonl"))
+    else:
+        filename_pattern = str(Path(tmpdir, "*.dem.geojsonl"))
+
+    tmp_file_paths = []
+
+    for filename in glob.glob(filename_pattern):
+        if f".{type}." not in filename:
+            continue
+
+        try:
+            df = gpd.read_file(filename)
+        except Exception:
+            raise RuntimeError(f"Failed to read {filename}")
+            # import shutil
+
+            # # copy dir to /tmp for debugging:
+            # shutil.copytree(tmpdir, "/tmp/failed")
+            # raise
+
+        df = df.drop(columns=["gmlId", "type"])
+        # Explode
+        df = df.explode(column="geometry")
+        tmp_dest_path = str(Path(tmpdir, f"{filename}.parquet"))
+        df.to_parquet(tmp_dest_path, index=False)
+
+        tmp_file_paths.append(tmp_dest_path)
+
+    df = None
+    for filename in tmp_file_paths:
+        subdf = gpd.read_parquet(filename)
+        # NOTE: set ignore_index True for re-indexing
+        df = (
+            pd.concat([df, subdf], ignore_index=True)
+            if df is not None
+            else subdf
+        )
+
+    if df is None:
+        raise RuntimeError("Data is empty")
+
+    df.to_parquet(outfile, compression="zstd")
+    # return df
diff --git a/plateaukit/readers/citygml/parsers/city_object_parser.py b/plateaukit/readers/citygml/parsers/city_object_parser.py
@@ -40,6 +40,7 @@ def __init__(
             "Building": f"{{{default_nsmap['bldg']}}}Building",
             "Road": f"{{{default_nsmap['tran']}}}Road",
             "Bridge": f"{{{default_nsmap['brid']}}}Bridge",
+            "Relief": f"{{{default_nsmap['dem']}}}ReliefFeature",
         }
 
         self.geometry_type_tags = [
@@ -63,6 +64,10 @@ def __init__(
                 "type": "lod2MultiSurface",
                 "tag": f"{{{default_nsmap['brid']}}}lod2MultiSurface",
             },
+            {
+                "type": "tin",
+                "tag": "dem:reliefComponent/dem:TINRelief/dem:tin",
+            },
         ]
 
 
@@ -158,6 +163,30 @@ def _get_geometry(self, root: etree._Element):
 
                 geoms.append(geom)
 
+            elif type in ["tin"]:
+                chunked_poslists = parser.extract_chunked_poslists(el, type="Triangle")
+
+                # NOTE: Boundaries should not be closed
+                chunked_poslists = [
+                    [bound[:3] for bound in surface] for surface in chunked_poslists
+                ]
+
+                geom = IRGeometry(
+                    type="CompositeSurface",
+                    lod="1",
+                    boundaries=chunked_poslists,
+                    semantics={
+                        "surfaces": [
+                            {
+                                "type": f"+{type}",
+                            }
+                        ],
+                        "values": [0 for _ in range(len(chunked_poslists))],
+                    },
+                )
+
+                geoms.append(geom)
+
         # NOTE: Process boundedBy for Building and Bridge
 
         if root.tag in [
@@ -302,6 +331,16 @@ def parse(self, element: etree._Element) -> IRCityObject:
                 attributes=attributes,
                 geometry=geometry,
             )
+        elif el.tree.tag == self.object_type_tags["Relief"]:
+            geometry = self._get_geometry(el.tree)
+            # print(geometry)
+
+            obj = IRCityObject(
+                type="TINRelief",
+                id=citygml_id,
+                attributes=attributes,
+                geometry=geometry,
+            )
         else:
             raise NotImplementedError(f"Unknown object type: {el.tree.tag}")
             # warnings.warn(f"Unknown object type: {el.tree.tag}")