From e3f313c37d4b6141a74b1416f7ac1d503e01a4a8 Mon Sep 17 00:00:00 2001
From: hboisgon <45457510+hboisgon@users.noreply.github.com>
Date: Tue, 19 Dec 2023 21:08:18 +0800
Subject: [PATCH] Marina pilot (#209)

* local data rivers and reservoirs + bugfixes

* linting
---
 docs/changelog.rst                     |  5 +++
 hydromt_wflow/wflow.py                 | 43 ++++++++++++-----------
 hydromt_wflow/workflows/river.py       | 47 ++++++++++++++------------
 hydromt_wflow/workflows/waterbodies.py | 10 ++++--
 4 files changed, 62 insertions(+), 43 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 640fc173..9c6d0ca8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -11,12 +11,17 @@ Unreleased
 
 Added
 -----
+- **setup_rivers**: Add river depth based on rivdph columns in river_geom_fn rather than only computed from qbankfull column.
 
 Changed
 -------
 
 Fixed
 -----
+- **setup_reservoirs**: Fix error if optional columns 'Capacity_norm', 'Capacity_min', 'xout', 'yout' are not in reservoir_fn. Allow to pass kwargs to the get_data method.
+- **setup_lulcmaps**: Fix error when looking for mapping_fn in self.data_catalog
+- **setup_config_output_timeseries**: bugfix for reducer.
+- remove or update calls to check if source in self.data_catalog `Issue #501 <https://github.com/Deltares/hydromt/issues/501>`_
 
 Deprecated
 ----------
diff --git a/hydromt_wflow/wflow.py b/hydromt_wflow/wflow.py
index 08639f06..387adba0 100644
--- a/hydromt_wflow/wflow.py
+++ b/hydromt_wflow/wflow.py
@@ -912,9 +912,6 @@ def setup_laimaps(self, lai_fn="modis_lai"):
 
             * Required variables: ['LAI']
         """
-        if lai_fn not in self.data_catalog:
-            self.logger.warning(f"Invalid source '{lai_fn}', skipping setup_laimaps.")
-            return
         # retrieve data for region
         self.logger.info("Preparing LAI maps.")
         da = self.data_catalog.get_rasterdataset(lai_fn, geom=self.region, buffer=2)
@@ -997,7 +994,7 @@ def setup_config_output_timeseries(
                     "parameter": param[o],
                 }
                 if reducer is not None:
-                    gauge_toml_dict["reducer"]: reducer[o]
+                    gauge_toml_dict["reducer"] = reducer[o]
                 # If the gauge column/variable already exists skip writting twice
                 if gauge_toml_dict not in self.config[toml_output][var_name]:
                     self.config[toml_output][var_name].append(gauge_toml_dict)
@@ -1334,10 +1331,6 @@ def setup_areamap(
             Nodata value to use when rasterizing. Should match the dtype of col2raster.
             By default -1.
         """
-        if area_fn not in self.data_catalog:
-            self.logger.warning(f"Invalid source '{area_fn}', skipping setup_areamap.")
-            return
-
         self.logger.info(f"Preparing '{col2raster}' map from '{area_fn}'.")
         gdf_org = self.data_catalog.get_geodataframe(
             area_fn, geom=self.basins, dst_crs=self.crs
@@ -1362,6 +1355,7 @@ def setup_lakes(
         rating_curve_fns: List[Union[str, Path]] = None,
         min_area: float = 10.0,
         add_maxstorage: bool = False,
+        **kwargs,
     ):
         """Generate maps of lake areas and outlets.
 
@@ -1421,9 +1415,14 @@ def setup_lakes(
             If True, maximum storage of the lake is added to the output
             (controlled lake) based on 'Vol_max' [m3] column of lakes_fn.
             By default False (natural lake).
+        kwargs: optional
+            Keyword arguments passed to the method
+            hydromt.DataCatalog.get_rasterdataset()
         """
         # Derive lake are and outlet maps
-        gdf_org, ds_lakes = self._setup_waterbodies(lakes_fn, "lake", min_area)
+        gdf_org, ds_lakes = self._setup_waterbodies(
+            lakes_fn, "lake", min_area, **kwargs
+        )
         if ds_lakes is None:
             return
         rmdict = {k: v for k, v in self._MAPS.items() if k in ds_lakes.data_vars}
@@ -1577,6 +1576,9 @@ def setup_reservoirs(
             JRC 'jrc' (using hydroengine package). By default None.
         min_area : float, optional
             Minimum reservoir area threshold [km2], by default 1.0 km2.
+        kwargs: optional
+            Keyword arguments passed to the method
+            hydromt.DataCatalog.get_rasterdataset()
 
         """
         # rename to wflow naming convention
@@ -1603,7 +1605,9 @@ def setup_reservoirs(
             "input.lateral.river.reservoir.targetminfrac": "ResTargetMinFrac",
         }
 
-        gdf_org, ds_res = self._setup_waterbodies(reservoirs_fn, "reservoir", min_area)
+        gdf_org, ds_res = self._setup_waterbodies(
+            reservoirs_fn, "reservoir", min_area, **kwargs
+        )
         # TODO: check if there are missing values in the above columns of
         # the parameters tbls =
         # if everything is present, skip calculate reservoirattrs() and
@@ -1674,15 +1678,17 @@ def setup_reservoirs(
             for option in res_toml:
                 self.set_config(option, res_toml[option])
 
-    def _setup_waterbodies(self, waterbodies_fn, wb_type, min_area=0.0):
+    def _setup_waterbodies(self, waterbodies_fn, wb_type, min_area=0.0, **kwargs):
         """Help with common workflow of setup_lakes and setup_reservoir.
 
         See specific methods for more info about the arguments.
         """
         # retrieve data for basin
         self.logger.info(f"Preparing {wb_type} maps.")
+        if "predicate" not in kwargs:
+            kwargs.update(predicate="contains")
         gdf_org = self.data_catalog.get_geodataframe(
-            waterbodies_fn, geom=self.basins, predicate="contains"
+            waterbodies_fn, geom=self.basins, **kwargs
         )
         # skip small size waterbodies
         if "Area_avg" in gdf_org.columns and gdf_org.geometry.size > 0:
@@ -1715,10 +1721,9 @@ def _setup_waterbodies(self, waterbodies_fn, wb_type, min_area=0.0):
                 uparea_name=uparea_name,
                 logger=self.logger,
             )
-            # update xout and yout in gdf_org from gdf_wateroutlet:
-            if "xout" in gdf_org.columns and "yout" in gdf_org.columns:
-                gdf_org.loc[:, "xout"] = gdf_wateroutlet["xout"]
-                gdf_org.loc[:, "yout"] = gdf_wateroutlet["yout"]
+            # update/replace xout and yout in gdf_org from gdf_wateroutlet:
+            gdf_org["xout"] = gdf_wateroutlet["xout"]
+            gdf_org["yout"] = gdf_wateroutlet["yout"]
 
         else:
             self.logger.warning(
@@ -2016,12 +2021,12 @@ def setup_precip_forcing(
         Parameters
         ----------
         precip_fn : str, default era5
-            Precipitation data source, see data/forcing_sources.yml.
+            Precipitation data source.
 
             * Required variable: ['precip']
         precip_clim_fn : str, default None
             High resolution climatology precipitation data source to correct
-            precipitation, see data/forcing_sources.yml.
+            precipitation.
 
             * Required variable: ['precip']
         chunksize: int, optional
@@ -2029,8 +2034,6 @@ def setup_precip_forcing(
             If None the data chunksize is used, this can however be optimized for
             large/small catchments. By default None.
         """
-        if precip_fn is None:
-            return
         starttime = self.get_config("starttime")
         endtime = self.get_config("endtime")
         freq = pd.to_timedelta(self.get_config("timestepsecs"), unit="s")
diff --git a/hydromt_wflow/workflows/river.py b/hydromt_wflow/workflows/river.py
index ea83158a..355ea00a 100644
--- a/hydromt_wflow/workflows/river.py
+++ b/hydromt_wflow/workflows/river.py
@@ -193,7 +193,7 @@ def river_bathymetry(
     ds_model : xr.Dataset
         Model dataset with 'flwdir', 'rivmsk', 'rivlen', 'x_out' and 'y_out' variables.
     gdf_riv : gpd.GeoDataFrame
-        River geometry with 'rivwth' and 'qbankfull' columns.
+        River geometry with 'rivwth' and 'rivdph' or 'qbankfull' columns.
     method : {'gvf', 'manning', 'powlaw'}
         see py:meth:`hydromt.workflows.river_depth` for details, by default "powlaw"
     smooth_len : float, optional
@@ -222,7 +222,7 @@ def river_bathymetry(
     rivlen_avg = ds_model["rivlen"].values[riv_mask].mean()
 
     ## river width and bunkfull discharge
-    vars0 = ["rivwth", "qbankfull"]
+    vars0 = ["rivwth", "rivdph", "qbankfull"]
     # find nearest values from river shape if provided
     # if None assume the data is in ds_model
     if gdf_riv is not None:
@@ -259,11 +259,13 @@ def river_bathymetry(
                 dst_nn < max_dist, gdf_riv.loc[idx_nn, name].fillna(-9999).values, -9999
             )
             ds_model[name] = xr.Variable(dims, data, attrs=dict(_FillValue=-9999))
-    # TODO fallback option when qbankfull is missing.
-    assert "qbankfull" in ds_model
+    else:
+        vars = vars0
     assert "rivwth" in ds_model
+    assert "qbankfull" in ds_model or "rivdph" in ds_model
+
     # fill gaps in data using downward filling along flow directions
-    for name in vars0:
+    for name in vars:
         data = ds_model[name].values
         nodata = ds_model[name].raster.nodata
         if np.all(data[riv_mask] != nodata):
@@ -282,22 +284,25 @@ def river_bathymetry(
     )
 
     ## river depth
-    # distance to outlet; required for manning and gvf rivdph methods
-    if method != "powlaw" and "rivdst" not in ds_model:
-        rivlen = ds_model["rivlen"].values
-        nodata = ds_model["rivlen"].raster.nodata
-        rivdst = flwdir_river.accuflux(rivlen, nodata=nodata, direction="down")
-        ds_model["rivdst"] = xr.Variable(dims, rivdst, attrs=dict(_FillValue=nodata))
-    # add river distance to outlet -> required for manning/gvf method
-    rivdph = workflows.river_depth(
-        data=ds_model,
-        flwdir=flwdir_river,
-        method=method,
-        min_rivdph=min_rivdph,
-        **kwargs,
-    )
-    attrs = dict(_FillValue=-9999, unit="m")
-    ds_model["rivdph"] = xr.Variable(dims, rivdph, attrs=attrs).fillna(-9999)
+    if "rivdph" not in ds_model:
+        # distance to outlet; required for manning and gvf rivdph methods
+        if method != "powlaw" and "rivdst" not in ds_model:
+            rivlen = ds_model["rivlen"].values
+            nodata = ds_model["rivlen"].raster.nodata
+            rivdst = flwdir_river.accuflux(rivlen, nodata=nodata, direction="down")
+            ds_model["rivdst"] = xr.Variable(
+                dims, rivdst, attrs=dict(_FillValue=nodata)
+            )
+        # add river distance to outlet -> required for manning/gvf method
+        rivdph = workflows.river_depth(
+            data=ds_model,
+            flwdir=flwdir_river,
+            method=method,
+            min_rivdph=min_rivdph,
+            **kwargs,
+        )
+        attrs = dict(_FillValue=-9999, unit="m")
+        ds_model["rivdph"] = xr.Variable(dims, rivdph, attrs=attrs).fillna(-9999)
     # smooth by averaging along flow directions and set minimum
     if smooth_len > 0:
         ds_model["rivdph"].values = flwdir_river.moving_average(
diff --git a/hydromt_wflow/workflows/waterbodies.py b/hydromt_wflow/workflows/waterbodies.py
index 14786774..9385e65d 100644
--- a/hydromt_wflow/workflows/waterbodies.py
+++ b/hydromt_wflow/workflows/waterbodies.py
@@ -355,9 +355,15 @@ def reservoirattrs(gdf, timeseries_fn=None, perc_norm=50, perc_min=20, logger=lo
         max_area = np.nanmax([df_out["resarea"].iloc[i], 0.0])
         max_cap = np.nanmax([df_out["resmaxvolume"].iloc[i], 0.0])
         norm_area = np.nanmax([df_EO["normarea"].iloc[i], 0.0])
-        norm_cap = np.nanmax([gdf["Capacity_norm"].iloc[i], 0.0])
+        if "Capacity_norm" in gdf.columns:
+            norm_cap = np.nanmax([gdf["Capacity_norm"].iloc[i], 0.0])
+        else:
+            norm_cap = 0.0
         min_area = np.nanmax([df_EO["minarea"].iloc[i], 0.0])
-        min_cap = np.nanmax([gdf["Capacity_min"].iloc[i], 0.0])
+        if "Capacity_min" in gdf.columns:
+            min_cap = np.nanmax([gdf["Capacity_min"].iloc[i], 0.0])
+        else:
+            min_cap = 0.0
         mv = 0.0
 
         # Maximum level