Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Saving data in folders fixes #233

Merged
merged 4 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
"Ground Floor Height": "found_ht",
"Ground Elevation": "ground_elv",
"geometry": "geometry",
"Aggregation Label: Census Block": "cbfips"
"Aggregation Label: Census Blockgroup": "cbfips"
}
95 changes: 64 additions & 31 deletions hydromt_fiat/fiat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pandas as pd
from hydromt.models.model_grid import GridModel
from shapely.geometry import box
import os
import os
import shutil

from hydromt_fiat import DATADIR
Expand Down Expand Up @@ -71,6 +71,10 @@ def __init__(
self.exposure = None
self.vulnerability = None
self.vf_ids_and_linking_df = pd.DataFrame()
self.additional_attributes_fn = (
"" # Path or paths to the additional attributes dataset(s)
)
self.building_footprint_fn = "" # Path to the building footprints dataset

def setup_global_settings(
self,
Expand Down Expand Up @@ -103,7 +107,6 @@ def setup_output(
output_dir: str = "output",
output_csv_name: str = "output.csv",
output_vector_name: Union[str, List[str]] = "spatial.gpkg",

) -> None:
"""Setup Delft-FIAT output folder and files.

Expand Down Expand Up @@ -427,7 +430,7 @@ def update_max_potential_damage(
method=method,
max_dist=max_dist,
)

def update_ground_elevation(
self,
ground_elevation: Union[int, float, None, str, Path],
Expand Down Expand Up @@ -503,7 +506,9 @@ def setup_hazard(
# read maps and retrieve their attributes
da_map_fn, da_name, da_type = read_maps(params, da_map_fn, idx)

da = self.data_catalog.get_rasterdataset(da_map_fn) # removed geom=self.region because it is not always there
da = self.data_catalog.get_rasterdataset(
da_map_fn
) # removed geom=self.region because it is not always there

# Convert to units of the exposure data if required
if (
Expand Down Expand Up @@ -755,47 +760,53 @@ def setup_equity_data(
equity.match_geo_ID()
equity.download_shp_geom(year_data, county_numbers)
equity.merge_equity_data_shp()
equity.clean()

self.set_tables(df=equity.equity_data_shp, name="equity_data")

# Save the census block aggregation area data
block_groups = equity.get_block_groups()
self.set_geoms(block_groups, "aggregation_areas/block_groups")

# Update the aggregation label: census block
del self.exposure.exposure_db["Aggregation Label: Census Blockgroup"]
self.setup_aggregation_areas(
aggregation_area_fn=block_groups,
attribute_names="GEOID_short",
label_names="Aggregation Label: Census Blockgroup",
)

def setup_aggregation_areas(
self,
aggregation_area_fn: Union[List[str], List[Path], str, Path],
aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
attribute_names: Union[List[str], str],
label_names: Union[List[str], str],
):
"""_summary_

Parameters
----------
exposure_gdf : gpd.GeoDataFrame
Exposure data to join the aggregation areas to as "Aggregation
Label: `label_names`".
aggregation_area_fn : Union[List[str], List[Path], str, Path]
Path(s) to the aggregation area(s).
attribute_names : Union[List[str], str]
Name of the attribute(s) to join.
label_names : Union[List[str], str]
Name of the label(s) to join.

The name that the new attribute will get in the exposure data.
"""

exposure_gdf = self.exposure.get_full_gdf(self.exposure.exposure_db)
self.exposure.exposure_db = join_exposure_aggregation_areas(
exposure_gdf, aggregation_area_fn, attribute_names, label_names
)

# Create additional attributes folder in root
additional_att_input = Path(self.root).joinpath("additional_attributes")
if not os.path.exists(additional_att_input):
os.makedirs(additional_att_input)

if isinstance(aggregation_area_fn,list):
for file in aggregation_area_fn:
shutil.copy2(file, additional_att_input)
else:
shutil.copy2(aggregation_area_fn, additional_att_input)

# Set the additional_attributes_fn property to save the additional datasets
if isinstance(aggregation_area_fn, list):
the_type = type(aggregation_area_fn[0])
else:
the_type = type(aggregation_area_fn)
if the_type != gpd.GeoDataFrame:
# This copies data from one location to the root folder for the FIAT
# model, only use user-input data here (not the census blocks)
self.additional_attributes_fn = aggregation_area_fn

def setup_building_footprint(
self,
Expand All @@ -822,15 +833,9 @@ def setup_building_footprint(
building_footprint_fn,
attribute_name,
)
# Create BF folder in Exposure
building_footprints_exp = Path(self.root).joinpath("exposure" , "building_footprints")
if not os.path.exists(building_footprints_exp):
os.makedirs(building_footprints_exp)
if isinstance(building_footprint_fn,list):
for file in building_footprint_fn:
shutil.copy2(file, building_footprints_exp)
else:
shutil.copy2(building_footprint_fn, building_footprints_exp)

# Set the building_footprint_fn property to save the building footprints
self.building_footprint_fn = building_footprint_fn

# Update functions
def update_all(self):
Expand Down Expand Up @@ -959,6 +964,34 @@ def write(self):
self.write_geoms(fn="exposure/{name}.gpkg", driver="GPKG")
if self._tables:
self.write_tables()
if self.additional_attributes_fn:
folder = Path(self.root).joinpath("additional_attributes")
self.copy_datasets(self.additional_attributes_fn, folder)
if self.building_footprint_fn:
folder = Path(self.root).joinpath("exposure", "building_footprints")
self.copy_datasets(self.building_footprint_fn, folder)

def copy_datasets(
self, data: Union[list, str, Path], folder: Union[Path, str]
) -> None:
"""Copies datasets to another folder

Parameters
----------
data : Union[list, str, Path]
_description_
folder : Union[Path, str]
_description_
"""
# Create additional attributes folder in root
if not os.path.exists(folder):
os.makedirs(folder)

if isinstance(data, list):
for file in data:
shutil.copy2(file, folder)
elif isinstance(data, Path) or isinstance(data, str):
shutil.copy2(data, folder)

def write_tables(self) -> None:
if len(self._tables) == 0:
Expand Down
13 changes: 8 additions & 5 deletions hydromt_fiat/workflows/aggregation_areas.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def process_value(value):

def join_exposure_aggregation_multiple_areas(
exposure_gdf: gpd.GeoDataFrame,
aggregation_area_fn: Union[List[str], List[Path]],
aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame]],
attribute_names: List[str],
label_names: List[str],
) -> gpd.GeoDataFrame:
Expand All @@ -35,7 +35,10 @@ def join_exposure_aggregation_multiple_areas(
_description_
"""
for file_path, attribute_name, label_name in zip(aggregation_area_fn, attribute_names, label_names):
aggregation_gdf = gpd.read_file(file_path)
if isinstance(file_path, str) or isinstance(file_path, Path):
aggregation_gdf = gpd.read_file(file_path)
else:
aggregation_gdf = file_path

## check the projection of both gdf and if not match, reproject
if exposure_gdf.crs != aggregation_gdf.crs:
Expand All @@ -50,7 +53,7 @@ def join_exposure_aggregation_multiple_areas(
exposure_gdf = gpd.sjoin(
exposure_gdf,
aggregation_gdf[["geometry", attribute_name]],
op="intersects",
predicate="intersects",
how="left",
)

Expand All @@ -69,7 +72,7 @@ def join_exposure_aggregation_multiple_areas(

def join_exposure_aggregation_areas(
exposure_gdf: gpd.GeoDataFrame,
aggregation_area_fn: Union[List[str], List[Path], str, Path],
aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
attribute_names: Union[List[str], str],
label_names: Union[List[str], str],
) -> gpd.GeoDataFrame:
Expand All @@ -86,7 +89,7 @@ def join_exposure_aggregation_areas(
label_names : Union[List[str], str]
Name of the label(s) to join.
"""
if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path):
if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path) or isinstance(aggregation_area_fn, gpd.GeoDataFrame):
aggregation_area_fn = [aggregation_area_fn]
if isinstance(attribute_names, str):
attribute_names = [attribute_names]
Expand Down
22 changes: 18 additions & 4 deletions hydromt_fiat/workflows/equity_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from zipfile import ZipFile
from pathlib import Path
from typing import List
import shutil


class EquityData:
Expand All @@ -25,7 +26,7 @@ def __init__(self, data_catalog: DataCatalog, logger: Logger, save_folder: str):

self.pd_domain_scores_geo = pd.DataFrame()
self.logger = logger
self.svi_data_shp = gpd.GeoDataFrame()
self.equity_data_shp = gpd.GeoDataFrame()
self.block_groups = gpd.GeoDataFrame()

def set_up_census_key(self, census_key: str):
Expand Down Expand Up @@ -177,12 +178,19 @@ def download_shp_geom(self, year_data: int, counties: List[str]):

block_groups_shp = shp.dissolve(by=attrs, as_index=False)
block_groups_shp = block_groups_shp[attrs + ["geometry"]]
# block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
block_groups_shp["GEOID_short"] = block_groups_shp["GEO_ID"].str.split("US").str[1]
block_groups_list.append(block_groups_shp)

self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list))

# Delete the shapefile, that is not used anymore
shp_folder = Path(self.save_folder) / "shapefiles"
try:
shutil.rmtree(shp_folder)
except Exception as e:
self.logger.warning(f"Folder {shp_folder} cannot be removed: {e}")

def merge_equity_data_shp(self):
"""Merges the geometry data with the equity_data downloaded"""
self.equity_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left")
Expand All @@ -191,8 +199,14 @@ def merge_equity_data_shp(self):
# Delete the rows that do not have a geometry column
self.equity_data_shp = self.equity_data_shp.loc[self.equity_data_shp["geometry"].notnull()]

#self.svi_data_shp.drop(columns=columns_to_drop, inplace=True)
self.equity_data_shp = self.equity_data_shp.to_crs(epsg=4326)
self.logger.info(
"The geometry information was successfully added to the equity information"
)
)

def get_block_groups(self):
return self.block_groups[["GEOID_short", "geometry"]]

def clean(self):
"""Removes unnecessary columns"""
self.equity_data_shp = self.equity_data_shp[["GEOID_short", "TotalPopulationBG", "PerCapitaIncomeBG"]]
11 changes: 11 additions & 0 deletions tests/test_aggregation_areas.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ def test_aggregation_areas(case: ParameterSet | Sequence[object] | object):

# Check if the vulnerability functions exist
assert len(fm.vulnerability.functions) > 0

# Check if the additional_attributes folder exists
assert root.joinpath("additional_attributes").exists()

# Check if the files are copied to the right folder
aggregation_area_fn = _cases[case]["configuration"]["setup_aggregation_areas"]["aggregation_area_fn"]
if isinstance(aggregation_area_fn, Path):
aggregation_area_fn = [aggregation_area_fn]

for a in aggregation_area_fn:
assert root.joinpath("additional_attributes", a.name).exists()
Loading