From 231dcf43ed079851259b27b76ae587d647cf733c Mon Sep 17 00:00:00 2001 From: Frederique Date: Fri, 8 Dec 2023 13:40:47 +0100 Subject: [PATCH 1/4] Added geopy as dependency --- envs/hydromt-fiat-dev.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/envs/hydromt-fiat-dev.yml b/envs/hydromt-fiat-dev.yml index 61965ec7..d540c690 100644 --- a/envs/hydromt-fiat-dev.yml +++ b/envs/hydromt-fiat-dev.yml @@ -13,6 +13,7 @@ dependencies: - black - gdal>=3.1 - geopandas + - geopy - hydromt=0.8.0 - hydromt_sfincs - numpy diff --git a/pyproject.toml b/pyproject.toml index e7d5c5e6..85874e03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "hydromt==0.8.0", "hydromt_sfincs", "geopandas", + "geopy", "numpy", "pandas", "pyogrio", From 9260aa0119c974d3453a04fd6b87ff20e512e430 Mon Sep 17 00:00:00 2001 From: Frederique Date: Fri, 8 Dec 2023 14:35:11 +0100 Subject: [PATCH 2/4] added the us states and counties csv --- hydromt_fiat/data/hydromt_fiat_catalog_USA.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml b/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml index 6dea5121..ba698394 100644 --- a/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml +++ b/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml @@ -40,6 +40,13 @@ social_vulnerability: meta: category: social_vulnerability +us_states_counties: + path: social_vulnerability/us_states_counties.csv + data_type: DataFrame + driver: csv + meta: + category: social_vulnerability + default_hazus_iwr_linking: path: vulnerability_linking/default_hazus_iwr_curve_linking.csv data_type: DataFrame From 3900683d911991c740ced6b0c53bab385d660d1d Mon Sep 17 00:00:00 2001 From: Frederique Date: Fri, 8 Dec 2023 16:03:19 +0100 Subject: [PATCH 3/4] Implemented automatic geolocating, for the state and county of the exposure data --- hydromt_fiat/api/data_types.py | 4 - hydromt_fiat/api/svi_vm.py | 8 +- hydromt_fiat/fiat.py | 133 +++++++++++------- hydromt_fiat/workflows/equity_data.py | 124 ++++++++-------- hydromt_fiat/workflows/exposure_vector.py | 102 +++++++++----- hydromt_fiat/workflows/gis.py | 62 +++++--- .../workflows/social_vulnerability_index.py | 126 +++++++++-------- hydromt_fiat/workflows/utils.py | 9 ++ tests/test_SVI_exposure.py | 5 +- tests/test_equity_data.py | 14 +- tests/test_vulnerability_exposure.py | 3 - 11 files changed, 345 insertions(+), 245 deletions(-) diff --git a/hydromt_fiat/api/data_types.py b/hydromt_fiat/api/data_types.py index 208745f7..f0d4f89b 100644 --- a/hydromt_fiat/api/data_types.py +++ b/hydromt_fiat/api/data_types.py @@ -99,16 +99,12 @@ class AggregationAreaSettings(BaseModel): class SocialVulnerabilityIndexSettings(BaseModel): census_key: str codebook_fn: str - state_abbreviation: str year_data: int - county: str class EquityDataSettings(BaseModel): census_key: str - state_abbreviation: str year_data: int - county: str class ConfigYaml(BaseModel, extra=Extra.allow): diff --git a/hydromt_fiat/api/svi_vm.py b/hydromt_fiat/api/svi_vm.py index c100725b..e03df57e 100644 --- a/hydromt_fiat/api/svi_vm.py +++ b/hydromt_fiat/api/svi_vm.py @@ -26,19 +26,15 @@ def get_state_names(): dict_states = list_of_states() return list(dict_states.keys()) - def set_svi_settings(self, census_key, state_abbreviation, year_data, county): + def set_svi_settings(self, census_key, year_data): self.svi_model = SocialVulnerabilityIndexSettings( census_key = census_key, codebook_fn = "social_vulnerability", - state_abbreviation = state_abbreviation, year_data = year_data, - county = county, ) - def set_equity_settings(self, census_key, state_abbreviation, year_data, county): + def set_equity_settings(self, census_key, year_data): self.equity_model = EquityDataSettings( census_key = census_key, - state_abbreviation = state_abbreviation, year_data = year_data, - county = county, ) diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py index 46bc1e3f..50c6ef63 100644 --- a/hydromt_fiat/fiat.py +++ b/hydromt_fiat/fiat.py @@ -1,9 +1,7 @@ """Implement fiat model class""" import csv -import glob import logging -from os.path import basename, join from pathlib import Path from typing import List, Optional, Union @@ -26,10 +24,12 @@ create_risk_dataset, ) from hydromt_fiat.workflows.equity_data import EquityData -from hydromt_fiat.workflows.social_vulnerability_index import SocialVulnerabilityIndex +from hydromt_fiat.workflows.social_vulnerability_index import SocialVulnerabilityIndex, list_of_states from hydromt_fiat.workflows.vulnerability import Vulnerability from hydromt_fiat.workflows.aggregation_areas import join_exposure_aggregation_areas from hydromt_fiat.workflows.building_footprints import join_exposure_building_footprints +from hydromt_fiat.workflows.gis import locate_from_bounding_box +from hydromt_fiat.workflows.utils import get_us_county_numbers __all__ = ["FiatModel"] @@ -567,11 +567,8 @@ def setup_social_vulnerability_index( self, census_key: str, codebook_fn: Union[str, Path], - state_abbreviation: str, - user_dataset_fn: str = None, - blockgroup_fn: str = None, year_data: int = None, - county: str = None + save_all: bool = False, ): """Setup the social vulnerability index for the vector exposure data for Delft-FIAT. This method has so far only been tested with US Census data @@ -585,43 +582,58 @@ def setup_social_vulnerability_index( The user's unique Census key that they got from the census.gov website (https://api.census.gov/data/key_signup.html) to be able to download the Census data - path : Union[str, Path] + codebook_fn : Union[str, Path] The path to the codebook excel - state_abbreviation : str - The abbreviation of the US state one would like to use in the analysis + year_data: int + The year of which the census data should be downloaded, 2020, 2021, or 2022 + save_all: bool + If True, all (normalized) data variables are saved, if False, only the SVI + column is added to the FIAT exposure data. By default False. """ - - # Create SVI object - svi = SocialVulnerabilityIndex(self.data_catalog, self.logger) - - # Call functionalities of SVI - # svi.read_dataset(user_dataset_fn) - svi.set_up_census_key(census_key) - svi.variable_code_csv_to_pd_df(codebook_fn) - svi.set_up_download_codes() - svi.set_up_state_code(state_abbreviation) - svi.download_census_data(year_data) - svi.rename_census_data("Census_code_withE", "Census_variable_name") - svi.identify_no_data() - svi.check_nan_variable_columns("Census_variable_name", "Indicator_code") - svi.check_zeroes_variable_rows() - translation_variable_to_indicator = svi.create_indicator_groups( - "Census_variable_name", "Indicator_code" - ) - svi.processing_svi_data(translation_variable_to_indicator) - svi.normalization_svi_data() - svi.domain_scores() - svi.composite_scores() - svi.match_geo_ID() - svi.download_shp_geom(year_data, county) - svi.merge_svi_data_shp() - - # store the relevant tables coming out of the social vulnerability module - self.set_tables(df=svi.svi_data_shp, name="social_vulnerability_scores") - # TODO: Think about adding an indicator for missing data to the svi.svi_data_shp - # Check if the exposure data exists if self.exposure: + # First find the state(s) and county/counties where the exposure data is + # located in + us_states_counties = self.data_catalog.get_dataframe("us_states_counties") + counties, states = locate_from_bounding_box(self.exposure.bounding_box()) + states_dict = list_of_states() + state_abbreviation = [] + for state in states: + try: + state_abbreviation.append(states_dict[state]) + except KeyError: + self.logger.warning(f"State {state} not found.") + + county_numbers = get_us_county_numbers(counties, us_states_counties) + + # Create SVI object + svi = SocialVulnerabilityIndex(self.data_catalog, self.logger) + + # Call functionalities of SVI + svi.set_up_census_key(census_key) + svi.variable_code_csv_to_pd_df(codebook_fn) + svi.set_up_download_codes() + svi.set_up_state_code(state_abbreviation) + svi.download_census_data(year_data) + svi.rename_census_data("Census_code_withE", "Census_variable_name") + svi.identify_no_data() + svi.check_nan_variable_columns("Census_variable_name", "Indicator_code") + svi.check_zeroes_variable_rows() + translation_variable_to_indicator = svi.create_indicator_groups( + "Census_variable_name", "Indicator_code" + ) + svi.processing_svi_data(translation_variable_to_indicator) + svi.normalization_svi_data() + svi.domain_scores() + svi.composite_scores() + svi.match_geo_ID() + svi.download_shp_geom(year_data, county_numbers) + svi.merge_svi_data_shp() + + # store the relevant tables coming out of the social vulnerability module + self.set_tables(df=svi.svi_data_shp, name="social_vulnerability_scores") + # TODO: Think about adding an indicator for missing data to the svi.svi_data_shp + # Link the SVI score to the exposure data exposure_data = self.exposure.get_full_gdf(self.exposure.exposure_db) exposure_data.sort_values("Object ID") @@ -629,18 +641,27 @@ def setup_social_vulnerability_index( if svi.svi_data_shp.crs != exposure_data.crs: svi.svi_data_shp.to_crs(crs=exposure_data.crs, inplace=True) - svi_exp_joined = gpd.sjoin(exposure_data, svi.svi_data_shp, how="left") + if save_all: + # Clean up the columns that are saved + cols_to_save = list(svi.svi_data_shp.columns) + cols_to_save.remove("GEO_ID") + cols_to_save.remove("GEOID_short") + cols_to_save.remove("NAME") + cols_to_save.remove("composite_SVI") + else: + # Only save the composite_svi_z + cols_to_save = ["composite_svi_z", "geometry"] + + svi_exp_joined = gpd.sjoin(exposure_data, svi.svi_data_shp[cols_to_save], how="left") svi_exp_joined.drop(columns=["geometry"], inplace=True) svi_exp_joined = pd.DataFrame(svi_exp_joined) + svi_exp_joined.rename(columns={"composite_svi_z": "SVI"}, inplace=True) self.exposure.exposure_db = svi_exp_joined def setup_equity_data( self, census_key: str, - state_abbreviation: str, - blockgroup_fn: str = None, year_data: int = None, - county: str = None ): """Setup the download procedure for equity data similarly to the SVI setup @@ -654,9 +675,20 @@ def setup_equity_data( Census data path : Union[str, Path] The path to the codebook excel - state_abbreviation : str - The abbreviation of the US state one would like to use in the analysis """ + # First find the state(s) and county/counties where the exposure data is + # located in + us_states_counties = self.data_catalog.get_dataframe("us_states_counties") + counties, states = locate_from_bounding_box(self.exposure.bounding_box()) + states_dict = list_of_states() + state_abbreviation = [] + for state in states: + try: + state_abbreviation.append(states_dict[state]) + except KeyError: + self.logger.warning(f"State {state} not found.") + + county_numbers = get_us_county_numbers(counties, us_states_counties) # Create equity object equity = EquityData(self.data_catalog, self.logger) @@ -668,7 +700,7 @@ def setup_equity_data( equity.download_census_data(year_data) equity.rename_census_data() equity.match_geo_ID() - equity.download_shp_geom(year_data, county) + equity.download_shp_geom(year_data, county_numbers) equity.merge_equity_data_shp() self.set_tables(df=equity.equity_data_shp, name="equity_data") @@ -886,8 +918,11 @@ def write_tables(self) -> None: # The default location and save settings of the vulnerability curves fn = "vulnerability/vulnerability_identifiers.csv" kwargs = {"index": False} - elif "social_vulnerability" in name: - fn = f"exposure/{name}.csv" + elif name == "social_vulnerability_scores": + fn = f"exposure/SVI/{name}.csv" + kwargs = {"index": False} + elif name == "equity_data": + fn = f"exposure/equity/{name}.csv" kwargs = {"index": False} # Other, can also return an error or pass silently diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py index 2a4dc14c..426f8fc4 100644 --- a/hydromt_fiat/workflows/equity_data.py +++ b/hydromt_fiat/workflows/equity_data.py @@ -1,33 +1,32 @@ from census import Census # install in your environment using pip install Census from us import states # install in your environment using pip install us from hydromt.data_catalog import DataCatalog -from hydromt.log import setuplog -import logging +from logging import Logger import pandas as pd -import numpy as np import geopandas as gpd from urllib.request import urlopen from io import BytesIO from zipfile import ZipFile from pathlib import Path - +from typing import List class EquityData: - def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = None): + def __init__(self, data_catalog: DataCatalog, logger: Logger): self.data_catalog = data_catalog self.census_key = Census self.download_codes = {} - self.state_fips = 0 + self.state_fips = [] self.pd_census_data = pd.DataFrame() self.codebook = pd.DataFrame() self.indicator_groups = {} self.processed_census_data = pd.DataFrame() self.pd_domain_scores_geo = pd.DataFrame() - self.logger = setuplog("SVI", log_level=10) + self.logger = logger self.svi_data_shp = gpd.GeoDataFrame() self.block_groups = gpd.GeoDataFrame() + self.temp_folder = [] def set_up_census_key(self, census_key: str): @@ -45,8 +44,7 @@ def set_up_census_key(self, census_key: str): f"your census key {census_key} is used to download data from the Census website " ) - - def set_up_state_code(self, state_abbreviation: str): + def set_up_state_code(self, state_abbreviation: List[str]): """download census data for a state Parameters @@ -54,12 +52,10 @@ def set_up_state_code(self, state_abbreviation: str): state_abbreviation : str Abbreviation of the state for which you want to set up the census data download """ - state = [ - state_abbreviation - ] # read in the state abbreviation as specified in the ini file - state_obj = getattr(states, state[0]) - self.state_fips = state_obj.fips - self.logger.info(f"The state abbreviation specified is: {state_abbreviation}") + for state in state_abbreviation: + self.logger.info(f"The state abbreviation specified is: {state}") + state_obj = getattr(states, state) + self.state_fips.append(state_obj.fips) def variables_to_download(self): self.download_variables = ['B01003_001E', 'B19301_001E', 'NAME', 'GEO_ID'] # TODO: later make this a user input? @@ -67,21 +63,23 @@ def variables_to_download(self): def download_census_data(self, year_data): """download the census data it is possible to also make the county, tract and blockgroup flexible so that a user could specify exactly what to download - But: bear in mind, with social vulneraiblity we compare areas against each other, so Ideally you would have a large enough dataset (for statistical and validity purposes) """ - download_census_codes = self.census_key.acs.state_county_blockgroup( - fields=self.download_variables, - state_fips=self.state_fips, - county_fips="*", - tract="*", - blockgroup="*", - year=year_data - ) - self.pd_census_data = pd.DataFrame(download_census_codes) + dfs = [] + for sf in self.state_fips: + download_census_codes = self.census_key.acs.state_county_blockgroup( + fields=self.download_variables, + state_fips=sf, + county_fips="*", + tract="*", + blockgroup="*", + year=year_data + ) + dfs.append(pd.DataFrame(download_census_codes)) + + self.pd_census_data = pd.concat(dfs) self.logger.info( "The equity data was succesfully downloaded from the Census website" ) - return self.pd_census_data def rename_census_data(self): """renaming the columns so that they have variable names instead of variable codes as their headers @@ -136,51 +134,53 @@ def download_and_unzip(self, url, extract_to='.'): except Exception as e: print(f"Error during download and unzip: {e}") - def download_shp_geom(self, year_data, county): + def download_shp_geom(self, year_data: int, counties: List[str]): """Downloading the shapefiles from the government Tiger website Parameters ---------- year_data : int The year for which you want to download the census data and the corresponding shapefiles (for geometry) - county : int - the county code in which your area of interest lies + counties : List[str] + A list of county codes in which your area of interest lies """ - # Download shapefile of blocks - if year_data == 2022: - url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{self.state_fips}{county}_faces.zip" + block_groups_list = [] + for sf, county in zip(self.state_fips, counties): + # Download shapefile of blocks + if year_data == 2022: + url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{sf}{county}_faces.zip" + elif year_data == 2021: + url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{sf}{county}_faces.zip" + elif year_data == 2020: + url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{sf}{county}_faces.zip" + else: + self.logger.warning(f"Year {year_data} not available from 'https://www2.census.gov/geo/tiger'") + return + + # Save shapefiles + folder = f'Shapefiles/{sf}{county}/{year_data}' + self.temp_folder.append(folder) + self.logger.info(f"Downloading the county shapefile for {str(year_data)}") + self.download_and_unzip(url, folder) + shapefiles = list(Path(folder).glob("*.shp")) + if shapefiles: + shp = gpd.read_file(shapefiles[0]) + self.logger.info("The shapefile was downloaded") + else: + print("No shapefile found in the directory.") + + # Dissolve shapefile based on block groups code = "20" - self.logger.info("Downloading the county shapefile for 2022") - elif year_data == 2021: - url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{self.state_fips}{county}_faces.zip" - code = "20" - self.logger.info("Downloading the county shapefile for 2021") - elif year_data == 2020: - url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{self.state_fips}{county}_faces.zip" - code = "20" - self.logger.info("Downloading the county shapefile for 2020") - else: - print("year not supported") - return - # Save shapefiles - fold_name = f'Shapefiles/{self.state_fips}{county}/{year_data}' - self.download_and_unzip(url, fold_name) - shapefiles = list(Path(fold_name).glob("*.shp")) - if shapefiles: - self.shp = gpd.read_file(shapefiles[0]) - self.logger.info("The shapefile was downloaded") - else: - print("No shapefile found in the directory.") - - # Dissolve shapefile based on block groups - attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"] - attrs = [attr + code for attr in attrs] - - self.block_groups = self.shp.dissolve(by=attrs, as_index=False) - self.block_groups = self.block_groups[attrs + ["geometry"]] - # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str) - self.block_groups["GEO_ID"] = "1500000US" + self.block_groups['STATEFP' + code].astype(str) + self.block_groups['COUNTYFP' + code].astype(str) + self.block_groups['TRACTCE' + code].astype(str) + self.block_groups['BLKGRPCE' + code].astype(str) + attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"] + attrs = [attr + code for attr in attrs] + + block_groups_shp = shp.dissolve(by=attrs, as_index=False) + block_groups_shp = block_groups_shp[attrs + ["geometry"]] + # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str) + block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str) + block_groups_list.append(block_groups_shp) + self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list)) def merge_equity_data_shp(self): """Merges the geometry data with the equity_data downloaded""" diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py index 8b98e3ad..b949b82d 100644 --- a/hydromt_fiat/workflows/exposure_vector.py +++ b/hydromt_fiat/workflows/exposure_vector.py @@ -1,5 +1,4 @@ import json -import rasterio import logging from pathlib import Path from typing import Any, List, Optional, Union @@ -33,9 +32,7 @@ ) from hydromt_fiat.workflows.roads import get_max_potential_damage_roads -from rasterio.features import rasterize -from xrspatial import zonal_stats -import xarray as xr + class ExposureVector(Exposure): _REQUIRED_COLUMNS = ["Object ID", "Extraction Method", "Ground Floor Height"] @@ -92,6 +89,11 @@ def __init__( self.unit = unit self._geom_names = list() # A list of (original) names of the geometry (files) + def bounding_box(self): + if len(self.exposure_geoms) > 0: + gdf = gpd.GeoDataFrame(pd.concat(self.exposure_geoms, ignore_index=True)) + return gdf.total_bounds + def read_table(self, fn: Union[str, Path]): """Read the Delft-FIAT exposure data. @@ -195,15 +197,21 @@ def setup_buildings_from_single_source( self.setup_extraction_method(extraction_method) # Set the exposure_geoms - self.set_exposure_geoms(gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]], crs=self.crs)) + self.set_exposure_geoms( + gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]], crs=self.crs) + ) # Set the name to the geom_names self.set_geom_names("buildings") # Set the ground floor height if not yet set - #TODO: Check a better way to access to to the geometries, self.empousure_geoms is a list an not a geodataframe + # TODO: Check a better way to access to to the geometries, self.empousure_geoms is a list an not a geodataframe if ground_elevation_file is not None: - self.setup_ground_elevation(ground_elevation_file, self.exposure_db, gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]])) + self.setup_ground_elevation( + ground_elevation_file, + self.exposure_db, + gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]]), + ) # Remove the geometry column from the exposure_db if "geometry" in self.exposure_db: @@ -217,7 +225,9 @@ def setup_roads( ): self.logger.info("Setting up roads...") if str(source).upper() == "OSM": - polygon = self.region["geometry"].values[0] #TODO check if this works each time + polygon = self.region["geometry"].values[ + 0 + ] # TODO check if this works each time roads = get_roads_from_osm(polygon, road_types) if roads.empty: @@ -247,7 +257,9 @@ def setup_roads( # Add the max potential damage and the length of the segments to the roads road_damage = self.data_catalog.get_dataframe(road_damage) - roads[["Max Potential Damage: Structure", "Segment Length [m]"]] = get_max_potential_damage_roads(roads, road_damage) + roads[ + ["Max Potential Damage: Structure", "Segment Length [m]"] + ] = get_max_potential_damage_roads(roads, road_damage) self.set_exposure_geoms(roads[["Object ID", "geometry"]]) self.set_geom_names("roads") @@ -275,7 +287,9 @@ def setup_buildings_from_multiple_sources( self.setup_max_potential_damage(max_potential_damage, damage_types, country) self.setup_ground_floor_height(ground_floor_height) self.setup_extraction_method(extraction_method) - self.setup_ground_elevation(ground_elevation_file, self.exposure_db, self.get_full_gdf(self.exposure_db)) + self.setup_ground_elevation( + ground_elevation_file, self.exposure_db, self.get_full_gdf(self.exposure_db) + ) def setup_asset_locations(self, asset_locations: str) -> None: """Set up the asset locations (points or polygons). @@ -519,7 +533,7 @@ def setup_ground_floor_height( be either 'nearest' (nearest neighbor) or 'intersection'. By default 'nearest'. max_dist : float - The maximum distance for the nearest join measured in meters, by default + The maximum distance for the nearest join measured in meters, by default set to 10 meters. """ if ground_floor_height: @@ -535,7 +549,9 @@ def setup_ground_floor_height( # A single file is used to assign the ground floor height to the assets gfh = self.data_catalog.get_geodataframe(ground_floor_height) gdf = self.get_full_gdf(self.exposure_db) - gdf = join_spatial_data(gdf, gfh, attr_name, method, max_dist, self.logger) + gdf = join_spatial_data( + gdf, gfh, attr_name, method, max_dist, self.logger + ) self.exposure_db = self._set_values_from_other_column( gdf, "Ground Floor Height", attr_name ) @@ -549,7 +565,9 @@ def setup_ground_floor_height( def setup_max_potential_damage( self, - max_potential_damage: Union[int, float, str, Path, List[str], List[Path], pd.DataFrame]=None, + max_potential_damage: Union[ + int, float, str, Path, List[str], List[Path], pd.DataFrame + ] = None, damage_types: Union[List[str], str, None] = None, country: Union[str, None] = None, target_attribute: Union[str, List[str], None] = None, @@ -581,23 +599,27 @@ def setup_max_potential_damage( if isinstance(damage_types, str): damage_types = [damage_types] - - if isinstance(max_potential_damage, pd.DataFrame - ): - self.update_max_potential_damage( - updated_max_potential_damages=max_potential_damage - ) + + if isinstance(max_potential_damage, pd.DataFrame): + self.update_max_potential_damage( + updated_max_potential_damages=max_potential_damage + ) elif isinstance(max_potential_damage, int) or isinstance( max_potential_damage, float ): # Set the column(s) to a single value for damage_type in damage_types: - self.exposure_db[f"Max Potential Damage: {damage_type}"] = max_potential_damage + self.exposure_db[ + f"Max Potential Damage: {damage_type}" + ] = max_potential_damage elif isinstance(max_potential_damage, list): # Multiple files are used to assign the ground floor height to the assets NotImplemented - elif max_potential_damage in ["jrc_damage_values", "hazus_max_potential_damages"]: + elif max_potential_damage in [ + "jrc_damage_values", + "hazus_max_potential_damages", + ]: if max_potential_damage == "jrc_damage_values": damage_source = self.data_catalog.get_dataframe(max_potential_damage) if country is None: @@ -611,9 +633,11 @@ def setup_max_potential_damage( elif max_potential_damage == "hazus_max_potential_damages": damage_source = self.data_catalog.get_dataframe(max_potential_damage) damage_values = preprocess_hazus_damage_values(damage_source) - + # Calculate the area of each object - gdf = self.get_full_gdf(self.exposure_db)[["Primary Object Type", "geometry"]] + gdf = self.get_full_gdf(self.exposure_db)[ + ["Primary Object Type", "geometry"] + ] gdf = get_area(gdf) # Set the damage values to the exposure data @@ -623,7 +647,8 @@ def setup_max_potential_damage( self.exposure_db[ f"Max Potential Damage: {damage_type.capitalize()}" ] = [ - damage_values[building_type][damage_type.lower()] * square_meters + damage_values[building_type][damage_type.lower()] + * square_meters for building_type, square_meters in zip( gdf["Primary Object Type"], gdf["area"] ) @@ -637,7 +662,7 @@ def setup_max_potential_damage( max_potential_damage, Path ): # When the max_potential_damage is a string but not jrc_damage_values - # or hazus_max_potential_damages. Here, a single file is used to + # or hazus_max_potential_damages. Here, a single file is used to # assign the ground floor height to the assets gfh = self.data_catalog.get_geodataframe(max_potential_damage) gdf = self.get_full_gdf(self.exposure_db) @@ -652,17 +677,18 @@ def setup_ground_elevation( exposure_db: pd.DataFrame, exposure_geoms: gpd.GeoDataFrame, ) -> None: - if ground_elevation: ground_elevation_from_dem( ground_elevation=ground_elevation, exposure_db=exposure_db, exposure_geoms=exposure_geoms, ) - + else: - print('Ground elevation is not recognized by the setup_ground_elevation function\n Ground elevation will be set to 0') - exposure_db['Ground Elevation'] = 0 + print( + "Ground elevation is not recognized by the setup_ground_elevation function\n Ground elevation will be set to 0" + ) + exposure_db["Ground Elevation"] = 0 def update_max_potential_damage( self, updated_max_potential_damages: pd.DataFrame @@ -884,7 +910,9 @@ def setup_new_composite_areas( elevation_reference: str, path_ref: str = None, attr_ref: str = None, - ground_elevation: Union[int, float, None, str, Path, List[str], List[Path]]=None, + ground_elevation: Union[ + int, float, None, str, Path, List[str], List[Path] + ] = None, ) -> None: """Adds one or multiple (polygon) areas to the exposure database with a composite damage function and a percentage of the total damage. @@ -924,7 +952,9 @@ def setup_new_composite_areas( percent_growth = float(percent_growth) / 100 geom_file = Path(geom_file) - assert geom_file.is_file(), f"File {str(geom_file)} is missing, cannot set up a new composite area." + assert ( + geom_file.is_file() + ), f"File {str(geom_file)} is missing, cannot set up a new composite area." # Calculate the total damages for the new object, for the indicated damage types new_object_damages = self.calculate_damages_new_exposure_object( @@ -1047,8 +1077,8 @@ def setup_new_composite_areas( ) # Adding elevation data into the new objects - self.setup_ground_elevation(ground_elevation,new_objects,_new_exposure_geoms) - + self.setup_ground_elevation(ground_elevation, new_objects, _new_exposure_geoms) + # Update the exposure_db self.exposure_db = pd.concat([self.exposure_db, new_objects]).reset_index( drop=True @@ -1073,7 +1103,9 @@ def link_exposure_vulnerability( linking_per_damage_type = exposure_linking_table.loc[ exposure_linking_table["Damage Type"] == damage_type, : ] - assert not linking_per_damage_type.empty, f"Damage type {damage_type} not found in the exposure-vulnerability linking table" + assert ( + not linking_per_damage_type.empty + ), f"Damage type {damage_type} not found in the exposure-vulnerability linking table" # Create a dictionary that links the exposure data to the vulnerability data linking_dict = dict( @@ -1473,4 +1505,4 @@ def _set_values_from_other_column( df[col_to_copy].notna(), col_to_copy ] del df[col_to_copy] - return df \ No newline at end of file + return df diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py index 3411bfab..2ccaefb6 100644 --- a/hydromt_fiat/workflows/gis.py +++ b/hydromt_fiat/workflows/gis.py @@ -9,6 +9,7 @@ import numpy as np import xarray as xr from pathlib import Path +from geopy.geocoders import Nominatim def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -197,9 +198,11 @@ def join_spatial_data( ) except AssertionError as e: logger.warning(e) - logger.warning("Reprojecting the GeoDataFrame from " - f"{get_crs_str_from_gdf(right_gdf.crs)} to " - f"{get_crs_str_from_gdf(left_gdf.crs)}.") + logger.warning( + "Reprojecting the GeoDataFrame from " + f"{get_crs_str_from_gdf(right_gdf.crs)} to " + f"{get_crs_str_from_gdf(left_gdf.crs)}." + ) right_gdf = right_gdf.to_crs(left_gdf.crs) left_gdf_type = check_geometry_type(left_gdf) @@ -243,41 +246,62 @@ def ground_elevation_from_dem( exposure_db: pd.DataFrame, exposure_geoms: gpd.GeoDataFrame, ) -> None: - # This function was developed by Willem https://github.com/interTwin-eu/DT-flood/blob/DemonstrationNotebooks/Notebooks/SetupFIAT.ipynb - #TODO: Find equivalent functions in hydromt + # TODO: Find equivalent functions in hydromt # Read in the DEM dem = rasterio.open(ground_elevation) - # gdf = self.get_full_gdf(exposure_db) + # gdf = self.get_full_gdf(exposure_db) gdf = exposure_geoms.to_crs(dem.crs.data) # Create a list of geometries plus a label for rasterize # The labels start at 1 since the label 0 is reserved for everything not in a geometry # The order of each tuple is (geometry,label) - shapes = list(enumerate(gdf['geometry'].values)) - shapes = [(t[1],t[0]+1) for t in shapes] + shapes = list(enumerate(gdf["geometry"].values)) + shapes = [(t[1], t[0] + 1) for t in shapes] rasterized = rasterize( - shapes=shapes, - out_shape=dem.shape, - transform=dem.transform, - all_touched=True + shapes=shapes, out_shape=dem.shape, transform=dem.transform, all_touched=True ) # zonal_stats needs xarrays as input rasterized = xr.DataArray(rasterized) # Calculate the zonal statistics - zonal_out = zonal_stats(rasterized,xr.DataArray(dem.read(1)), - stats_funcs=['mean'], - nodata_values=np.nan) + zonal_out = zonal_stats( + rasterized, + xr.DataArray(dem.read(1)), + stats_funcs=["mean"], + nodata_values=np.nan, + ) # The zero label is for pixels not in a geometry so we discard them zonal_out = zonal_out.drop(0) # Array storing the zonal means # Store the calculated means at index corresponding to their label - zonal_means = np.full(len(shapes),np.nan) - zonal_means[[zonal_out['zone'].values-1]] = zonal_out['mean'].values + zonal_means = np.full(len(shapes), np.nan) + zonal_means[[zonal_out["zone"].values - 1]] = zonal_out["mean"].values # # Add Ground Elevation column and get rid of nans in the appropriate way - exposure_db['Ground Elevation'] = zonal_means - exposure_db['Ground Elevation'].bfill(inplace=True) + exposure_db["Ground Elevation"] = zonal_means + exposure_db["Ground Elevation"].bfill(inplace=True) + + +def locate_from_bounding_box(bounding_box): + geolocator = Nominatim(user_agent="hydromt-fiat") + + search = [ + (bounding_box[1], bounding_box[0]), + (bounding_box[1], bounding_box[2]), + (bounding_box[3], bounding_box[0]), + (bounding_box[3], bounding_box[2]), + ] + locations = [geolocator.reverse(s) for s in search] + locations_list = [location[0].split(", ") for location in locations] + locations_list_no_numbers = [[y for y in x if not y.isnumeric()] for x in locations_list] + counties = [y for x in locations_list for y in x if "county" in y.lower()] + states = [x[-2] for x in locations_list_no_numbers] + + counties_states_combo = set(list(zip(counties, states))) + counties = [c[0] for c in counties_states_combo] + states = [c[1] for c in counties_states_combo] + + return counties, states diff --git a/hydromt_fiat/workflows/social_vulnerability_index.py b/hydromt_fiat/workflows/social_vulnerability_index.py index de68b9ca..c838007b 100644 --- a/hydromt_fiat/workflows/social_vulnerability_index.py +++ b/hydromt_fiat/workflows/social_vulnerability_index.py @@ -1,8 +1,7 @@ from census import Census # install in your environment using pip install Census from us import states # install in your environment using pip install us from hydromt.data_catalog import DataCatalog -from hydromt.log import setuplog -import logging +from logging import Logger import pandas as pd import numpy as np import geopandas as gpd @@ -10,6 +9,7 @@ from io import BytesIO from zipfile import ZipFile from pathlib import Path +from typing import List def list_of_states(): states = { @@ -71,11 +71,11 @@ def list_of_states(): return states_inverted class SocialVulnerabilityIndex: - def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = None): + def __init__(self, data_catalog: DataCatalog, logger: Logger): self.data_catalog = data_catalog self.census_key = Census self.download_codes = {} - self.state_fips = 0 + self.state_fips = [] self.pd_census_data = pd.DataFrame() self.codebook = pd.DataFrame() self.indicator_groups = {} @@ -87,9 +87,10 @@ def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = No self.pd_domain_scores = pd.DataFrame() self.pd_domain_scores_z = pd.DataFrame() self.pd_domain_scores_geo = pd.DataFrame() - self.logger = setuplog("SVI", log_level=10) + self.logger = logger self.svi_data_shp = gpd.GeoDataFrame() self.block_groups = gpd.GeoDataFrame() + self.temp_folder = [] def read_dataset(self, path: str): """If you have your own dataset (e.g. already downloaded census data), you can use this to load it from a csv @@ -132,7 +133,7 @@ def set_up_download_codes(self): self.download_codes = list(self.codebook["Census_code_withE"]) self.download_codes.append("NAME") - def set_up_state_code(self, state_abbreviation: str): + def set_up_state_code(self, state_abbreviation: List[str]): """download census data for a state Parameters @@ -140,31 +141,33 @@ def set_up_state_code(self, state_abbreviation: str): state_abbreviation : str Abbreviation of the state for which you want to set up the census data download """ - state = [ - state_abbreviation - ] # read in the state abbreviation as specified in the ini file - state_obj = getattr(states, state[0]) - self.state_fips = state_obj.fips - self.logger.info(f"The state abbreviation specified is: {state_abbreviation}") + for state in state_abbreviation: + self.logger.info(f"The state abbreviation specified is: {state}") + state_obj = getattr(states, state) + self.state_fips.append(state_obj.fips) + def download_census_data(self, year_data): """download the census data it is possible to also make the county, tract and blockgroup flexible so that a user could specify exactly what to download But: bear in mind, with social vulneraiblity we compare areas against each other, so Ideally you would have a large enough dataset (for statistical and validity purposes) """ - download_census_codes = self.census_key.acs.state_county_blockgroup( - fields=self.download_codes, - state_fips=self.state_fips, - county_fips="*", - tract="*", - blockgroup="*", - year=year_data - ) - self.pd_census_data = pd.DataFrame(download_census_codes) + dfs = [] + for sf in self.state_fips: + download_census_codes = self.census_key.acs.state_county_blockgroup( + fields=self.download_codes, + state_fips=sf, + county_fips="*", + tract="*", + blockgroup="*", + year=year_data + ) + dfs.append(pd.DataFrame(download_census_codes)) + + self.pd_census_data = pd.concat(dfs) self.logger.info( "The census data was successfully downloaded using the specified variables in the codebook Excel" ) - return self.pd_census_data def rename_census_data(self, from_name: str, to_name: str): """renaming the columns so that they have variable names instead of variable codes as their headers @@ -441,7 +444,7 @@ def normalization_svi_data(self): ) svi_fiat_names.append(column + "_norm") else: - print("Direction is not provided") + self.logger.warning(f"Normalization direction for {column} is not provided") self.logger.info( "The data is standardized according to their relation specified in the codebook Excel (inverse or normal z-scores)" ) @@ -549,52 +552,55 @@ def download_and_unzip(self, url, extract_to='.'): zipfile = ZipFile(BytesIO(http_response.read())) zipfile.extractall(path=extract_to) except Exception as e: - print(f"Error during download and unzip: {e}") + self.logger.warning(f"Error during download and unzip: {e}") - def download_shp_geom(self, year_data, county): + def download_shp_geom(self, year_data: int, counties: List[str]): """Downloading the shapefiles from the government Tiger website Parameters ---------- year_data : int The year for which you want to download the census data and the corresponding shapefiles (for geometry) - county : int - the county code in which your area of interest lies + counties : List[str] + A list of county codes in which your area of interest lies """ - # Download shapefile of blocks - if year_data == 2022: - url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{self.state_fips}{county}_faces.zip" - code = "20" - self.logger.info("Downloading the county shapefile for 2022") - elif year_data == 2021: - url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{self.state_fips}{county}_faces.zip" - code = "20" - self.logger.info("Downloading the county shapefile for 2021") - elif year_data == 2020: - url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{self.state_fips}{county}_faces.zip" + block_groups_list = [] + for sf, county in zip(self.state_fips, counties): + # Download shapefile of blocks + if year_data == 2022: + url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{sf}{county}_faces.zip" + elif year_data == 2021: + url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{sf}{county}_faces.zip" + elif year_data == 2020: + url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{sf}{county}_faces.zip" + else: + self.logger.warning(f"Year {year_data} not available from 'https://www2.census.gov/geo/tiger'") + return + + # Save shapefiles + folder = f'Shapefiles/{sf}{county}/{year_data}' + self.temp_folder.append(folder) + self.logger.info(f"Downloading the county shapefile for {str(year_data)}") + self.download_and_unzip(url, folder) + shapefiles = list(Path(folder).glob("*.shp")) + if shapefiles: + shp = gpd.read_file(shapefiles[0]) + self.logger.info("The shapefile was downloaded") + else: + self.logger.warning("No county shapefile found in the directory.") + + # Dissolve shapefile based on block groups code = "20" - self.logger.info("Downloading the county shapefile for 2020") - else: - print("year not supported") - return - # Save shapefiles - fold_name = f'Shapefiles/{self.state_fips}{county}/{year_data}' - self.download_and_unzip(url, fold_name) - shapefiles = list(Path(fold_name).glob("*.shp")) - if shapefiles: - self.shp = gpd.read_file(shapefiles[0]) - self.logger.info("The shapefile was downloaded") - else: - print("No shapefile found in the directory.") - - # Dissolve shapefile based on block groups - attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"] - attrs = [attr + code for attr in attrs] - - self.block_groups = self.shp.dissolve(by=attrs, as_index=False) - self.block_groups = self.block_groups[attrs + ["geometry"]] - # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str) - self.block_groups["GEO_ID"] = "1500000US" + self.block_groups['STATEFP' + code].astype(str) + self.block_groups['COUNTYFP' + code].astype(str) + self.block_groups['TRACTCE' + code].astype(str) + self.block_groups['BLKGRPCE' + code].astype(str) + attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"] + attrs = [attr + code for attr in attrs] + + block_groups_shp = shp.dissolve(by=attrs, as_index=False) + block_groups_shp = block_groups_shp[attrs + ["geometry"]] + # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str) + block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str) + block_groups_list.append(block_groups_shp) + + self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list)) def merge_svi_data_shp(self): diff --git a/hydromt_fiat/workflows/utils.py b/hydromt_fiat/workflows/utils.py index 69d9d45a..c6c4511e 100644 --- a/hydromt_fiat/workflows/utils.py +++ b/hydromt_fiat/workflows/utils.py @@ -1,3 +1,6 @@ +import pandas as pd + + def detect_delimiter(csvFile): """From stackoverflow https://stackoverflow.com/questions/16312104/can-i-import-a-csv-file-and-automatically-infer-the-delimiter @@ -10,3 +13,9 @@ def detect_delimiter(csvFile): return "," # default delimiter return "," + + +def get_us_county_numbers(county_names: list, states_counties_table: pd.DataFrame) -> list: + county_numbers = list(states_counties_table.loc[states_counties_table["COUNTYNAME"].isin(county_names), "COUNTYFP"].values) + county_numbers = [str(cn).zfill(3) for cn in county_numbers] + return county_numbers \ No newline at end of file diff --git a/tests/test_SVI_exposure.py b/tests/test_SVI_exposure.py index 70c4f87a..2f0c9b82 100644 --- a/tests/test_SVI_exposure.py +++ b/tests/test_SVI_exposure.py @@ -61,10 +61,7 @@ "setup_social_vulnerability_index": { "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9", "codebook_fn": "social_vulnerability", - "state_abbreviation": "SC", - "blockgroup_fn": "blockgroup_shp_data", "year_data": 2021, - "county": "019" }, }, } @@ -92,7 +89,7 @@ def test_SVI_exposure(case): assert root.joinpath("exposure", "region.gpkg").exists() # Check if the social vulnerability data exists - assert root.joinpath("exposure", "social_vulnerability_scores.csv").exists() + assert root.joinpath("exposure", "SVI", "social_vulnerability_scores.csv").exists() # Check if the vulnerability data exists assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists() \ No newline at end of file diff --git a/tests/test_equity_data.py b/tests/test_equity_data.py index 25395c9e..6ffee54d 100644 --- a/tests/test_equity_data.py +++ b/tests/test_equity_data.py @@ -24,9 +24,7 @@ "setup_equity_data": { "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9", - "state_abbreviation": "SC", "year_data": 2021, - "county": "019" }, }, } @@ -43,6 +41,16 @@ def test_equity_data(case): data_libs = EXAMPLEDIR.joinpath(_cases[case]["data_catalogue"]) fm = FiatModel(root=root, mode="w", data_libs=data_libs, logger=logger) - fm.build(opt=_cases[case]["configuration"]) fm.write() + + # Check if the exposure data exists + assert root.joinpath("exposure", "buildings.gpkg").exists() + assert root.joinpath("exposure", "exposure.csv").exists() + assert root.joinpath("exposure", "region.gpkg").exists() + + # Check if the equity data exists + assert root.joinpath("exposure", "equity", "equity_data.csv").exists() + + # Check if the vulnerability data exists + assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists() \ No newline at end of file diff --git a/tests/test_vulnerability_exposure.py b/tests/test_vulnerability_exposure.py index 558acc98..e8ec88d4 100644 --- a/tests/test_vulnerability_exposure.py +++ b/tests/test_vulnerability_exposure.py @@ -91,10 +91,7 @@ "setup_social_vulnerability_index": { "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9", "codebook_fn": "social_vulnerability", - "state_abbreviation": "SC", - "blockgroup_fn": "blockgroup_shp_data", "year_data": 2021, - "county": "019" }, }, "region": _region, From 92e27cc191d6e5f74dbacc6fed3fbc6506c06d7a Mon Sep 17 00:00:00 2001 From: Frederique Date: Fri, 8 Dec 2023 16:17:56 +0100 Subject: [PATCH 4/4] Updating tests, cleaning data, adding different folders in the output --- hydromt_fiat/fiat.py | 2 +- hydromt_fiat/workflows/equity_data.py | 3 ++ .../workflows/social_vulnerability_index.py | 3 ++ tests/test_equity_data.py | 44 +++++++++++++++++-- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py index 50c6ef63..6ee2104a 100644 --- a/hydromt_fiat/fiat.py +++ b/hydromt_fiat/fiat.py @@ -922,7 +922,7 @@ def write_tables(self) -> None: fn = f"exposure/SVI/{name}.csv" kwargs = {"index": False} elif name == "equity_data": - fn = f"exposure/equity/{name}.csv" + fn = f"equity/{name}.csv" kwargs = {"index": False} # Other, can also return an error or pass silently diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py index 426f8fc4..236ab642 100644 --- a/hydromt_fiat/workflows/equity_data.py +++ b/hydromt_fiat/workflows/equity_data.py @@ -187,6 +187,9 @@ def merge_equity_data_shp(self): self.equity_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left") self.equity_data_shp = gpd.GeoDataFrame(self.equity_data_shp) + # Delete the rows that do not have a geometry column + self.equity_data_shp = self.equity_data_shp.loc[self.equity_data_shp["geometry"].notnull()] + #self.svi_data_shp.drop(columns=columns_to_drop, inplace=True) self.equity_data_shp = self.equity_data_shp.to_crs(epsg=4326) self.logger.info( diff --git a/hydromt_fiat/workflows/social_vulnerability_index.py b/hydromt_fiat/workflows/social_vulnerability_index.py index c838007b..4713040d 100644 --- a/hydromt_fiat/workflows/social_vulnerability_index.py +++ b/hydromt_fiat/workflows/social_vulnerability_index.py @@ -608,6 +608,9 @@ def merge_svi_data_shp(self): self.svi_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left") self.svi_data_shp = gpd.GeoDataFrame(self.svi_data_shp) + # Delete the rows that do not have a geometry column + self.svi_data_shp = self.svi_data_shp.loc[self.svi_data_shp["geometry"].notnull()] + #self.svi_data_shp.drop(columns=columns_to_drop, inplace=True) self.svi_data_shp = self.svi_data_shp.to_crs(epsg=4326) self.logger.info( diff --git a/tests/test_equity_data.py b/tests/test_equity_data.py index 6ffee54d..005cac71 100644 --- a/tests/test_equity_data.py +++ b/tests/test_equity_data.py @@ -10,10 +10,33 @@ ) DATADIR = Path().absolute() / "hydromt_fiat" / "data" +_region = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "coordinates": [ + [ + [-80.0808, 32.7005], + [-79.8756, 32.8561], + [-79.8756, 32.7005], + [-80.0808, 32.8561], + [-80.0808, 32.7005], + ] + ], + "type": "Polygon", + }, + } + ], +} + _cases = { "Test_equity_data": { "data_catalogue": DATADIR / "hydromt_fiat_catalog_USA.yml", "folder": "Test_equity_data", + "region": _region, "configuration": { "setup_global_settings": {"crs": "epsg:4326"}, "setup_output": { @@ -21,7 +44,20 @@ "output_csv_name": "output.csv", "output_vector_name": "spatial.gpkg", }, - + "setup_vulnerability": { + "vulnerability_fn": "default_vulnerability_curves", + "vulnerability_identifiers_and_linking_fn": "default_hazus_iwr_linking", + "functions_mean": "default", + "functions_max": ["AGR1"], + "unit": "feet", + }, + "setup_exposure_buildings": { + "asset_locations": "NSI", + "occupancy_type": "NSI", + "max_potential_damage": "NSI", + "ground_floor_height": "NSI", + "unit": "ft", + }, "setup_equity_data": { "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9", "year_data": 2021, @@ -30,6 +66,7 @@ } } + @pytest.mark.parametrize("case", list(_cases.keys())) # @pytest.mark.skip(reason="Needs to be updated") def test_equity_data(case): @@ -41,7 +78,8 @@ def test_equity_data(case): data_libs = EXAMPLEDIR.joinpath(_cases[case]["data_catalogue"]) fm = FiatModel(root=root, mode="w", data_libs=data_libs, logger=logger) - fm.build(opt=_cases[case]["configuration"]) + region = gpd.GeoDataFrame.from_features(_cases[case]["region"], crs=4326) + fm.build(region={"geom": region}, opt=_cases[case]["configuration"]) fm.write() # Check if the exposure data exists @@ -53,4 +91,4 @@ def test_equity_data(case): assert root.joinpath("exposure", "equity", "equity_data.csv").exists() # Check if the vulnerability data exists - assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists() \ No newline at end of file + assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists()