Deltares · frederique-hub · Dec 15, 2023 · Dec 13, 2023 · Dec 13, 2023 · Dec 13, 2023
diff --git a/hydromt_fiat/data/attribute_linking/NSI_attributes_to_FIAT.json b/hydromt_fiat/data/attribute_linking/NSI_attributes_to_FIAT.json
@@ -8,5 +8,5 @@
     "Ground Floor Height": "found_ht",
     "Ground Elevation": "ground_elv",
     "geometry": "geometry",
-    "Aggregation Label: Census Block": "cbfips"
+    "Aggregation Label: Census Blockgroup": "cbfips"
 }
diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from hydromt.models.model_grid import GridModel
 from shapely.geometry import box
-import os 
+import os
 import shutil
 
 from hydromt_fiat import DATADIR
@@ -71,6 +71,10 @@ def __init__(
         self.exposure = None
         self.vulnerability = None
         self.vf_ids_and_linking_df = pd.DataFrame()
+        self.additional_attributes_fn = (
+            ""  # Path or paths to the additional attributes dataset(s)
+        )
+        self.building_footprint_fn = ""  # Path to the building footprints dataset
 
     def setup_global_settings(
         self,
@@ -103,7 +107,6 @@ def setup_output(
         output_dir: str = "output",
         output_csv_name: str = "output.csv",
         output_vector_name: Union[str, List[str]] = "spatial.gpkg",
-
     ) -> None:
         """Setup Delft-FIAT output folder and files.
 
@@ -427,7 +430,7 @@ def update_max_potential_damage(
                 method=method,
                 max_dist=max_dist,
             )
-    
+
     def update_ground_elevation(
         self,
         ground_elevation: Union[int, float, None, str, Path],
@@ -503,7 +506,9 @@ def setup_hazard(
             # read maps and retrieve their attributes
             da_map_fn, da_name, da_type = read_maps(params, da_map_fn, idx)
 
-            da = self.data_catalog.get_rasterdataset(da_map_fn)  # removed geom=self.region because it is not always there
+            da = self.data_catalog.get_rasterdataset(
+                da_map_fn
+            )  # removed geom=self.region because it is not always there
 
             # Convert to units of the exposure data if required
             if (
@@ -755,47 +760,53 @@ def setup_equity_data(
         equity.match_geo_ID()
         equity.download_shp_geom(year_data, county_numbers)
         equity.merge_equity_data_shp()
+        equity.clean()
 
         self.set_tables(df=equity.equity_data_shp, name="equity_data")
 
+        # Save the census block aggregation area data
+        block_groups = equity.get_block_groups()
+        self.set_geoms(block_groups, "aggregation_areas/block_groups")
+
+        # Update the aggregation label: census block
+        del self.exposure.exposure_db["Aggregation Label: Census Blockgroup"]
+        self.setup_aggregation_areas(
+            aggregation_area_fn=block_groups,
+            attribute_names="GEOID_short",
+            label_names="Aggregation Label: Census Blockgroup",
+        )
+
     def setup_aggregation_areas(
         self,
-        aggregation_area_fn: Union[List[str], List[Path], str, Path],
+        aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
         attribute_names: Union[List[str], str],
         label_names: Union[List[str], str],
     ):
         """_summary_
 
         Parameters
         ----------
-        exposure_gdf : gpd.GeoDataFrame
-            Exposure data to join the aggregation areas to as "Aggregation
-        Label: `label_names`".
         aggregation_area_fn : Union[List[str], List[Path], str, Path]
             Path(s) to the aggregation area(s).
         attribute_names : Union[List[str], str]
             Name of the attribute(s) to join.
         label_names : Union[List[str], str]
-            Name of the label(s) to join.
-
+            The name that the new attribute will get in the exposure data.
         """
-
         exposure_gdf = self.exposure.get_full_gdf(self.exposure.exposure_db)
         self.exposure.exposure_db = join_exposure_aggregation_areas(
             exposure_gdf, aggregation_area_fn, attribute_names, label_names
         )
-
-        # Create additional attributes folder in root  
-        additional_att_input = Path(self.root).joinpath("additional_attributes")
-        if not os.path.exists(additional_att_input):
-            os.makedirs(additional_att_input)
-
-        if isinstance(aggregation_area_fn,list):
-            for file in aggregation_area_fn:
-                shutil.copy2(file, additional_att_input)
-        else:
-            shutil.copy2(aggregation_area_fn, additional_att_input)
 
+        # Set the additional_attributes_fn property to save the additional datasets
+        if isinstance(aggregation_area_fn, list):
+            the_type = type(aggregation_area_fn[0])
+        else:
+            the_type = type(aggregation_area_fn)
+        if the_type != gpd.GeoDataFrame:
+            # This copies data from one location to the root folder for the FIAT
+            # model, only use user-input data here (not the census blocks)
+            self.additional_attributes_fn = aggregation_area_fn
 
     def setup_building_footprint(
         self,
@@ -822,15 +833,9 @@ def setup_building_footprint(
             building_footprint_fn,
             attribute_name,
         )
-        # Create BF folder in Exposure
-        building_footprints_exp = Path(self.root).joinpath("exposure" , "building_footprints")
-        if not os.path.exists(building_footprints_exp):
-            os.makedirs(building_footprints_exp)
-        if isinstance(building_footprint_fn,list):
-            for file in building_footprint_fn:
-                shutil.copy2(file, building_footprints_exp)
-        else:    
-            shutil.copy2(building_footprint_fn, building_footprints_exp)
+
+        # Set the building_footprint_fn property to save the building footprints
+        self.building_footprint_fn = building_footprint_fn
 
     # Update functions
     def update_all(self):
@@ -959,6 +964,34 @@ def write(self):
             self.write_geoms(fn="exposure/{name}.gpkg", driver="GPKG")
         if self._tables:
             self.write_tables()
+        if self.additional_attributes_fn:
+            folder = Path(self.root).joinpath("additional_attributes")
+            self.copy_datasets(self.additional_attributes_fn, folder)
+        if self.building_footprint_fn:
+            folder = Path(self.root).joinpath("exposure", "building_footprints")
+            self.copy_datasets(self.building_footprint_fn, folder)
+
+    def copy_datasets(
+        self, data: Union[list, str, Path], folder: Union[Path, str]
+    ) -> None:
+        """Copies datasets to another folder
+
+        Parameters
+        ----------
+        data : Union[list, str, Path]
+            _description_
+        folder : Union[Path, str]
+            _description_
+        """
+        # Create additional attributes folder in root
+        if not os.path.exists(folder):
+            os.makedirs(folder)
+
+        if isinstance(data, list):
+            for file in data:
+                shutil.copy2(file, folder)
+        elif isinstance(data, Path) or isinstance(data, str):
+            shutil.copy2(data, folder)
 
     def write_tables(self) -> None:
         if len(self._tables) == 0:

diff --git a/hydromt_fiat/workflows/aggregation_areas.py b/hydromt_fiat/workflows/aggregation_areas.py
@@ -12,7 +12,7 @@ def process_value(value):
 
 def join_exposure_aggregation_multiple_areas(
     exposure_gdf: gpd.GeoDataFrame,
-    aggregation_area_fn: Union[List[str], List[Path]],
+    aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame]],
     attribute_names: List[str],
     label_names: List[str],
 ) -> gpd.GeoDataFrame:
@@ -35,7 +35,10 @@ def join_exposure_aggregation_multiple_areas(
         _description_
     """
     for file_path, attribute_name, label_name in zip(aggregation_area_fn, attribute_names, label_names):
-        aggregation_gdf = gpd.read_file(file_path)
+        if isinstance(file_path, str) or isinstance(file_path, Path):
+            aggregation_gdf = gpd.read_file(file_path)
+        else:
+            aggregation_gdf = file_path
 
         ## check the projection of both gdf and if not match, reproject
         if exposure_gdf.crs != aggregation_gdf.crs:
@@ -50,7 +53,7 @@ def join_exposure_aggregation_multiple_areas(
         exposure_gdf = gpd.sjoin(
             exposure_gdf,
             aggregation_gdf[["geometry", attribute_name]],
-            op="intersects",
+            predicate="intersects",
             how="left",
         )
 
@@ -69,7 +72,7 @@ def join_exposure_aggregation_multiple_areas(
 
 def join_exposure_aggregation_areas(
     exposure_gdf: gpd.GeoDataFrame,
-    aggregation_area_fn: Union[List[str], List[Path], str, Path],
+    aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
     attribute_names: Union[List[str], str],
     label_names: Union[List[str], str],
 ) -> gpd.GeoDataFrame:
@@ -86,7 +89,7 @@ def join_exposure_aggregation_areas(
     label_names : Union[List[str], str]
         Name of the label(s) to join.
     """
-    if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path):
+    if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path) or isinstance(aggregation_area_fn, gpd.GeoDataFrame):
         aggregation_area_fn = [aggregation_area_fn]
     if isinstance(attribute_names, str):
         attribute_names = [attribute_names]

diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py
@@ -9,6 +9,7 @@
 from zipfile import ZipFile
 from pathlib import Path
 from typing import List
+import shutil
 
 
 class EquityData:
@@ -25,7 +26,7 @@ def __init__(self, data_catalog: DataCatalog, logger: Logger, save_folder: str):
 
         self.pd_domain_scores_geo = pd.DataFrame()
         self.logger = logger
-        self.svi_data_shp = gpd.GeoDataFrame()
+        self.equity_data_shp = gpd.GeoDataFrame()
         self.block_groups = gpd.GeoDataFrame()
 
     def set_up_census_key(self, census_key: str):
@@ -177,12 +178,19 @@ def download_shp_geom(self, year_data: int, counties: List[str]):
 
             block_groups_shp = shp.dissolve(by=attrs, as_index=False)
             block_groups_shp = block_groups_shp[attrs + ["geometry"]]
-            # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
             block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
+            block_groups_shp["GEOID_short"] = block_groups_shp["GEO_ID"].str.split("US").str[1]
             block_groups_list.append(block_groups_shp)
 
         self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list))
 
+        # Delete the shapefile, that is not used anymore
+        shp_folder = Path(self.save_folder) / "shapefiles"
+        try:
+            shutil.rmtree(shp_folder)
+        except Exception as e:
+            self.logger.warning(f"Folder {shp_folder} cannot be removed: {e}")
+
     def merge_equity_data_shp(self):
         """Merges the geometry data with the equity_data downloaded"""
         self.equity_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left")
@@ -191,8 +199,14 @@ def merge_equity_data_shp(self):
         # Delete the rows that do not have a geometry column
         self.equity_data_shp = self.equity_data_shp.loc[self.equity_data_shp["geometry"].notnull()]
 
-        #self.svi_data_shp.drop(columns=columns_to_drop, inplace=True)
         self.equity_data_shp = self.equity_data_shp.to_crs(epsg=4326)
         self.logger.info(
             "The geometry information was successfully added to the equity information"
-        )
+        )
+
+    def get_block_groups(self):
+        return self.block_groups[["GEOID_short", "geometry"]]
+
+    def clean(self):
+        """Removes unnecessary columns"""
+        self.equity_data_shp = self.equity_data_shp[["GEOID_short", "TotalPopulationBG", "PerCapitaIncomeBG"]]
diff --git a/tests/test_aggregation_areas.py b/tests/test_aggregation_areas.py
@@ -69,3 +69,14 @@ def test_aggregation_areas(case: ParameterSet | Sequence[object] | object):
 
     # Check if the vulnerability functions exist
     assert len(fm.vulnerability.functions) > 0
+
+    # Check if the additional_attributes folder exists
+    assert root.joinpath("additional_attributes").exists()
+
+    # Check if the files are copied to the right folder
+    aggregation_area_fn = _cases[case]["configuration"]["setup_aggregation_areas"]["aggregation_area_fn"]
+    if isinstance(aggregation_area_fn, Path):
+        aggregation_area_fn = [aggregation_area_fn]
+
+    for a in aggregation_area_fn:
+        assert root.joinpath("additional_attributes", a.name).exists()