From e7c536e41beb233b800e7f1c5ef4b6acbde37388 Mon Sep 17 00:00:00 2001
From: Brendan <brencodeert@outlook.com>
Date: Thu, 14 Dec 2023 14:50:25 +0100
Subject: [PATCH] Working allocation

---
 hydromt_wflow/wflow.py            |  82 +++++++++++++------
 hydromt_wflow/workflows/demand.py | 128 +++++++++++++++++++++++-------
 2 files changed, 159 insertions(+), 51 deletions(-)

diff --git a/hydromt_wflow/wflow.py b/hydromt_wflow/wflow.py
index 1dfb4681..e19ccc12 100644
--- a/hydromt_wflow/wflow.py
+++ b/hydromt_wflow/wflow.py
@@ -2492,15 +2492,56 @@ def setup_rootzoneclim(
         # update config
         self.set_config("input.vertical.rootingdepth", update_toml_rootingdepth)
 
-    def setup_water_demand(
+    def setup_allocation(
+        self,
+        min_area: float | int = 0,
+        admin_bounds_fn: str = "gadm",
+        admin_level: int = 0,
+    ):
+        """_summary_.
+
+        _extended_summary_
+
+        Parameters
+        ----------
+        min_area : float | int
+            _description_
+        admin_bounds_fn : str, optional
+            _description_, by default "gadm"
+        admin_level : int, optional
+            _description_, by default 0
+        """
+        # Will be fixes but for know this is done like this
+        # TODO fix in the future
+        admin_bounds = None
+        if admin_bounds_fn is not None:
+            admin_path = self.data_catalog[admin_bounds_fn].path
+            admin_bounds = self.data_catalog.get_geodataframe(
+                admin_path, geom=self.region, layer=f"level{admin_level}"
+            )
+            # Add this identifier for usage in the workflow
+            admin_bounds["admin_id"] = range(len(admin_bounds))
+
+        # Create the allocation grid
+        alloc = workflows.demand.allocate(
+            ds_like=self.grid,
+            min_area=min_area,
+            admin_bounds=admin_bounds,
+            basins=self.geoms["basins"],
+            rivers=self.geoms["rivers"],
+        )
+        self.set_grid(alloc)
+
+        # Update the settings toml
+        self.set_config("input.vertical.waterallocation.areas", "Allocation_id")
+
+    def setup_non_irigation(
         self,
         non_irigation_fn: str = "pcr_globwb",
         non_irigation_vars: list = ["dom", "ind", "lsk"],
         non_irigation_method: str = "nearest",
         population_fn: str = "worldpop_2020_constrained",
         population_method: str = "sum",
-        admin_bounds_fn: str = "gadm",
-        admin_level: int = 0,
     ):
         """_summary_.
 
@@ -2508,10 +2549,21 @@ def setup_water_demand(
 
         Parameters
         ----------
-        pcr_fn : str, optional
+        non_irigation_fn : str, optional
             _description_, by default "pcr_globwb"
-        pcr_vars : list, optional
+        non_irigation_vars : list, optional
             _description_, by default ["dom", "ind", "lsk"]
+        non_irigation_method : str, optional
+            _description_, by default "nearest"
+        population_fn : str, optional
+            _description_, by default "worldpop_2020_constrained"
+        population_method : str, optional
+            _description_, by default "sum"
+
+        Raises
+        ------
+        ValueError
+            _description_
         """
         if not all([item in ["dom", "ind", "lsk"] for item in non_irigation_vars]):
             raise ValueError("")
@@ -2543,28 +2595,12 @@ def setup_water_demand(
         self.set_grid(non_irigation)
         self.set_grid(popu)
 
-        # Will be fixes but for know this is done like this
-        # TODO fix in the future
-        admin_path = self.data_catalog[admin_bounds_fn].path
-        admin_bounds = self.data_catalog.get_geodataframe(
-            admin_path, geom=self.region, layer=f"level{admin_level}"
-        )
-
-        # Create the allocation grid
-        alloc = workflows.demand.allocate(
-            ds_like=self.grid,
-            admin_bounds=admin_bounds,
-            basins=self.geoms["basins"],
-            rivers=self.geoms["rivers"],
-        )
-        self.set_grid(alloc)
-
-        # Update the settings toml
+        # Update the settings toml with non irigation stuff
         for var in non_irigation.data_vars:
             sname, suffix = var.split("_")
             lname = workflows.demand.map_vars[sname]
             self.set_config(
-                f"vertical.{lname}.demand_{suffix}",
+                f"input.vertical.{lname}.demand_{suffix}",
                 var,
             )
 
diff --git a/hydromt_wflow/workflows/demand.py b/hydromt_wflow/workflows/demand.py
index 4c572c81..f4162493 100644
--- a/hydromt_wflow/workflows/demand.py
+++ b/hydromt_wflow/workflows/demand.py
@@ -2,6 +2,7 @@
 
 import math
 
+import pandas as pd
 import xarray as xr
 from affine import Affine
 from hydromt.raster import full_from_transform
@@ -37,6 +38,33 @@ def transform_half_degree(
     return affine, w, h
 
 
+def touch_intersect(row, vector):
+    """_summary_.
+
+    _extended_summary_
+
+    Parameters
+    ----------
+    row : _type_
+        _description_
+    vector : _type_
+        _description_
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    contain = True
+    _t = sum(vector.touches(row.geometry))
+    _i = sum(vector.intersects(row.geometry))
+    diff = abs(_i - _t)
+    if diff == 0:
+        contain = False
+    row["contain"] = contain
+    return row
+
+
 def non_irigation(
     ds: xr.Dataset,
     ds_like: xr.Dataset,
@@ -128,6 +156,7 @@ def non_irigation(
 
 def allocate(
     ds_like: xr.Dataset,
+    min_area: float | int,
     admin_bounds: object,
     basins: xr.Dataset,
     rivers: xr.Dataset,
@@ -135,29 +164,49 @@ def allocate(
     """_summary_.
 
     _extended_summary_
+
+    Parameters
+    ----------
+    ds_like : xr.Dataset
+        _description_
+    min_area : float | int
+        _description_
+    admin_bounds : object
+        _description_
+    basins : xr.Dataset
+        _description_
+    rivers : xr.Dataset
+        _description_
     """
     # Split based on admin bounds
-    split_basins = basins.overlay(
-        admin_bounds,
-        how="union",
-    )
-    split_basins = split_basins[~split_basins["value"].isna()]
-
-    # Remove unneccessary stuff
-    cols = split_basins.columns.drop(["value", "geometry", "NAME_2"]).tolist()
-    split_basins.drop(cols, axis=1, inplace=True)
-    # Use this uid to dissolve on later
-    split_basins["uid"] = range(len(split_basins))
-
-    # Dissolve cut pieces back
-    for _, row in split_basins.iterrows():
-        if not str(row.NAME_2).lower() == "nan":
-            continue
-        touched = split_basins[split_basins.touches(row.geometry)]
-        uid = touched[touched["value"] == row.value].uid.values[0]
-        split_basins.loc[split_basins["uid"] == row.uid, "uid"] = uid
-    split_basins = split_basins.dissolve("uid", sort=False, as_index=False)
+    sub_basins = basins.copy()
+    sub_basins["uid"] = range(len(sub_basins))
 
+    if admin_bounds is not None:
+        sub_basins = basins.overlay(
+            admin_bounds,
+            how="union",
+        )
+        sub_basins = sub_basins[~sub_basins["value"].isna()]
+
+        # Remove unneccessary stuff
+        cols = sub_basins.columns.drop(["value", "geometry", "admin_id"]).tolist()
+        sub_basins.drop(cols, axis=1, inplace=True)
+        # Use this uid to dissolve on later
+        sub_basins["uid"] = range(len(sub_basins))
+
+        # Dissolve cut pieces back
+        for _, row in sub_basins.iterrows():
+            if not str(row.admin_id).lower() == "nan":
+                continue
+            touched = sub_basins[sub_basins.touches(row.geometry)]
+            uid = touched[touched["value"] == row.value].uid.values[0]
+            sub_basins.loc[sub_basins["uid"] == row.uid, "uid"] = uid
+        sub_basins = sub_basins.dissolve("uid", sort=False, as_index=False)
+
+    # Set the contain flag per geom
+    sub_basins = sub_basins.apply(lambda row: touch_intersect(row, rivers), axis=1)
+    sub_basins["sqkm"] = sub_basins.geometry.to_crs(3857).area / 1000**2
     _count = 0
 
     # Create touched and not touched by rivers datasets
@@ -168,13 +217,26 @@ def allocate(
 
         # Everything touched by river based on difference
         # (is not what we want, yet)
-        riv_touch = split_basins.sjoin(
-            rivers,
-        )
+        if _count != 0:
+            sub_basins = sub_basins.apply(
+                lambda row: touch_intersect(row, rivers), axis=1
+            )
+            sub_basins["sqkm"] = sub_basins.geometry.to_crs(3857).area / 1000**2
 
         # Set no_riv and riv (what's touched and what's not)
-        no_riv = split_basins[~split_basins.geometry.isin(riv_touch.geometry)]
-        riv = split_basins[split_basins.geometry.isin(riv_touch.geometry)]
+        no_riv = sub_basins[~sub_basins["contain"]]
+        riv = sub_basins[sub_basins["contain"]]
+
+        # Include minimal area option
+        min_basins = sub_basins[sub_basins["sqkm"] < min_area]
+        min_basins = min_basins[~min_basins.uid.isin(no_riv.uid)]
+        # Only concatenate if there are different small basins
+        # compared to basins that do not touch a river
+        if not min_basins.empty:
+            no_riv = pd.concat(
+                [no_riv, min_basins],
+                ignore_index=True,
+            )
 
         _n = 0
 
@@ -192,7 +254,7 @@ def allocate(
                 uid = touched[touched["area"] == touched["area"].max()].uid.values[0]
             # Set the identifier to the new value
             # (i.e. the touched basin)
-            split_basins.loc[split_basins["uid"] == row.uid, "uid"] = uid
+            sub_basins.loc[sub_basins["uid"] == row.uid, "uid"] = uid
             _n += 1
 
         # Ensure a break if nothing is touched
@@ -201,6 +263,16 @@ def allocate(
         if _n == 0:
             break
 
-        split_basins = split_basins.dissolve("uid", sort=False, as_index=False)
+        sub_basins = sub_basins.dissolve("uid", sort=False, as_index=False)
         _count += 1
-        pass
+
+    alloc = full_from_transform(
+        ds_like.raster.transform,
+        ds_like.raster.shape,
+        crs=ds_like.raster.crs,
+        lazy=True,
+    )
+    alloc = alloc.raster.rasterize(sub_basins, col_name="uid", nodata=-9999)
+    alloc.name = "Allocation_id"
+    alloc = alloc.rename(dict(zip(alloc.dims, list(ds_like.dims)[:2])))
+    return alloc