From 231dcf43ed079851259b27b76ae587d647cf733c Mon Sep 17 00:00:00 2001
From: Frederique <frederique.degroen@deltares.nl>
Date: Fri, 8 Dec 2023 13:40:47 +0100
Subject: [PATCH 1/4] Added geopy as dependency

---
 envs/hydromt-fiat-dev.yml | 1 +
 pyproject.toml            | 1 +
 2 files changed, 2 insertions(+)

diff --git a/envs/hydromt-fiat-dev.yml b/envs/hydromt-fiat-dev.yml
index 61965ec7..d540c690 100644
--- a/envs/hydromt-fiat-dev.yml
+++ b/envs/hydromt-fiat-dev.yml
@@ -13,6 +13,7 @@ dependencies:
   - black
   - gdal>=3.1
   - geopandas
+  - geopy
   - hydromt=0.8.0
   - hydromt_sfincs
   - numpy
diff --git a/pyproject.toml b/pyproject.toml
index e7d5c5e6..85874e03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "hydromt==0.8.0",
     "hydromt_sfincs",
     "geopandas",
+    "geopy",
     "numpy",
     "pandas",
     "pyogrio",

From 9260aa0119c974d3453a04fd6b87ff20e512e430 Mon Sep 17 00:00:00 2001
From: Frederique <frederique.degroen@deltares.nl>
Date: Fri, 8 Dec 2023 14:35:11 +0100
Subject: [PATCH 2/4] added the us states and counties csv

---
 hydromt_fiat/data/hydromt_fiat_catalog_USA.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml b/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml
index 6dea5121..ba698394 100644
--- a/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml
+++ b/hydromt_fiat/data/hydromt_fiat_catalog_USA.yml
@@ -40,6 +40,13 @@ social_vulnerability:
   meta:
     category: social_vulnerability
 
+us_states_counties:
+  path: social_vulnerability/us_states_counties.csv
+  data_type: DataFrame
+  driver: csv
+  meta:
+    category: social_vulnerability
+
 default_hazus_iwr_linking:
   path: vulnerability_linking/default_hazus_iwr_curve_linking.csv
   data_type: DataFrame

From 3900683d911991c740ced6b0c53bab385d660d1d Mon Sep 17 00:00:00 2001
From: Frederique <frederique.degroen@deltares.nl>
Date: Fri, 8 Dec 2023 16:03:19 +0100
Subject: [PATCH 3/4] Implemented automatic geolocating, for the state and
 county of the exposure data

---
 hydromt_fiat/api/data_types.py                |   4 -
 hydromt_fiat/api/svi_vm.py                    |   8 +-
 hydromt_fiat/fiat.py                          | 133 +++++++++++-------
 hydromt_fiat/workflows/equity_data.py         | 124 ++++++++--------
 hydromt_fiat/workflows/exposure_vector.py     | 102 +++++++++-----
 hydromt_fiat/workflows/gis.py                 |  62 +++++---
 .../workflows/social_vulnerability_index.py   | 126 +++++++++--------
 hydromt_fiat/workflows/utils.py               |   9 ++
 tests/test_SVI_exposure.py                    |   5 +-
 tests/test_equity_data.py                     |  14 +-
 tests/test_vulnerability_exposure.py          |   3 -
 11 files changed, 345 insertions(+), 245 deletions(-)

diff --git a/hydromt_fiat/api/data_types.py b/hydromt_fiat/api/data_types.py
index 208745f7..f0d4f89b 100644
--- a/hydromt_fiat/api/data_types.py
+++ b/hydromt_fiat/api/data_types.py
@@ -99,16 +99,12 @@ class AggregationAreaSettings(BaseModel):
 class SocialVulnerabilityIndexSettings(BaseModel):
     census_key: str
     codebook_fn: str
-    state_abbreviation: str
     year_data: int
-    county: str
 
                 
 class EquityDataSettings(BaseModel):
     census_key: str
-    state_abbreviation: str
     year_data: int
-    county: str
 
 
 class ConfigYaml(BaseModel, extra=Extra.allow):
diff --git a/hydromt_fiat/api/svi_vm.py b/hydromt_fiat/api/svi_vm.py
index c100725b..e03df57e 100644
--- a/hydromt_fiat/api/svi_vm.py
+++ b/hydromt_fiat/api/svi_vm.py
@@ -26,19 +26,15 @@ def get_state_names():
         dict_states = list_of_states()
         return list(dict_states.keys())
 
-    def set_svi_settings(self, census_key, state_abbreviation, year_data, county):
+    def set_svi_settings(self, census_key, year_data):
         self.svi_model = SocialVulnerabilityIndexSettings(
             census_key = census_key,
             codebook_fn = "social_vulnerability",
-            state_abbreviation = state_abbreviation,
             year_data = year_data,
-            county = county,
         )
     
-    def set_equity_settings(self, census_key, state_abbreviation, year_data, county):
+    def set_equity_settings(self, census_key, year_data):
         self.equity_model = EquityDataSettings(
             census_key = census_key,
-            state_abbreviation = state_abbreviation,
             year_data = year_data,
-            county = county,
         )
diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py
index 46bc1e3f..50c6ef63 100644
--- a/hydromt_fiat/fiat.py
+++ b/hydromt_fiat/fiat.py
@@ -1,9 +1,7 @@
 """Implement fiat model class"""
 
 import csv
-import glob
 import logging
-from os.path import basename, join
 from pathlib import Path
 from typing import List, Optional, Union
 
@@ -26,10 +24,12 @@
     create_risk_dataset,
 )
 from hydromt_fiat.workflows.equity_data import EquityData
-from hydromt_fiat.workflows.social_vulnerability_index import SocialVulnerabilityIndex
+from hydromt_fiat.workflows.social_vulnerability_index import SocialVulnerabilityIndex, list_of_states
 from hydromt_fiat.workflows.vulnerability import Vulnerability
 from hydromt_fiat.workflows.aggregation_areas import join_exposure_aggregation_areas
 from hydromt_fiat.workflows.building_footprints import join_exposure_building_footprints
+from hydromt_fiat.workflows.gis import locate_from_bounding_box
+from hydromt_fiat.workflows.utils import get_us_county_numbers
 
 __all__ = ["FiatModel"]
 
@@ -567,11 +567,8 @@ def setup_social_vulnerability_index(
         self,
         census_key: str,
         codebook_fn: Union[str, Path],
-        state_abbreviation: str,
-        user_dataset_fn: str = None,
-        blockgroup_fn: str = None,
         year_data: int = None,
-        county: str = None
+        save_all: bool = False,
     ):
         """Setup the social vulnerability index for the vector exposure data for
         Delft-FIAT. This method has so far only been tested with US Census data
@@ -585,43 +582,58 @@ def setup_social_vulnerability_index(
             The user's unique Census key that they got from the census.gov website
             (https://api.census.gov/data/key_signup.html) to be able to download the
             Census data
-        path : Union[str, Path]
+        codebook_fn : Union[str, Path]
             The path to the codebook excel
-        state_abbreviation : str
-            The abbreviation of the US state one would like to use in the analysis
+        year_data: int 
+            The year of which the census data should be downloaded, 2020, 2021, or 2022
+        save_all: bool
+            If True, all (normalized) data variables are saved, if False, only the SVI
+            column is added to the FIAT exposure data. By default False.
         """
-
-        # Create SVI object
-        svi = SocialVulnerabilityIndex(self.data_catalog, self.logger)
-
-        # Call functionalities of SVI
-        # svi.read_dataset(user_dataset_fn)
-        svi.set_up_census_key(census_key)
-        svi.variable_code_csv_to_pd_df(codebook_fn)
-        svi.set_up_download_codes()
-        svi.set_up_state_code(state_abbreviation)
-        svi.download_census_data(year_data)
-        svi.rename_census_data("Census_code_withE", "Census_variable_name")
-        svi.identify_no_data()
-        svi.check_nan_variable_columns("Census_variable_name", "Indicator_code")
-        svi.check_zeroes_variable_rows()
-        translation_variable_to_indicator = svi.create_indicator_groups(
-            "Census_variable_name", "Indicator_code"
-        )
-        svi.processing_svi_data(translation_variable_to_indicator)
-        svi.normalization_svi_data()
-        svi.domain_scores()
-        svi.composite_scores()
-        svi.match_geo_ID()
-        svi.download_shp_geom(year_data, county)
-        svi.merge_svi_data_shp()
-
-        # store the relevant tables coming out of the social vulnerability module
-        self.set_tables(df=svi.svi_data_shp, name="social_vulnerability_scores")
-        # TODO: Think about adding an indicator for missing data to the svi.svi_data_shp
-
         # Check if the exposure data exists
         if self.exposure:
+            # First find the state(s) and county/counties where the exposure data is 
+            # located in
+            us_states_counties = self.data_catalog.get_dataframe("us_states_counties")
+            counties, states = locate_from_bounding_box(self.exposure.bounding_box())
+            states_dict = list_of_states()
+            state_abbreviation = []
+            for state in states:
+                try:
+                    state_abbreviation.append(states_dict[state])
+                except KeyError:
+                    self.logger.warning(f"State {state} not found.")
+
+            county_numbers = get_us_county_numbers(counties, us_states_counties)
+
+            # Create SVI object
+            svi = SocialVulnerabilityIndex(self.data_catalog, self.logger)
+
+            # Call functionalities of SVI
+            svi.set_up_census_key(census_key)
+            svi.variable_code_csv_to_pd_df(codebook_fn)
+            svi.set_up_download_codes()
+            svi.set_up_state_code(state_abbreviation)
+            svi.download_census_data(year_data)
+            svi.rename_census_data("Census_code_withE", "Census_variable_name")
+            svi.identify_no_data()
+            svi.check_nan_variable_columns("Census_variable_name", "Indicator_code")
+            svi.check_zeroes_variable_rows()
+            translation_variable_to_indicator = svi.create_indicator_groups(
+                "Census_variable_name", "Indicator_code"
+            )
+            svi.processing_svi_data(translation_variable_to_indicator)
+            svi.normalization_svi_data()
+            svi.domain_scores()
+            svi.composite_scores()
+            svi.match_geo_ID()
+            svi.download_shp_geom(year_data, county_numbers)
+            svi.merge_svi_data_shp()
+
+            # store the relevant tables coming out of the social vulnerability module
+            self.set_tables(df=svi.svi_data_shp, name="social_vulnerability_scores")
+            # TODO: Think about adding an indicator for missing data to the svi.svi_data_shp
+
             # Link the SVI score to the exposure data
             exposure_data = self.exposure.get_full_gdf(self.exposure.exposure_db)
             exposure_data.sort_values("Object ID")
@@ -629,18 +641,27 @@ def setup_social_vulnerability_index(
             if svi.svi_data_shp.crs != exposure_data.crs:
                 svi.svi_data_shp.to_crs(crs=exposure_data.crs, inplace=True)
 
-            svi_exp_joined = gpd.sjoin(exposure_data, svi.svi_data_shp, how="left")
+            if save_all:
+                # Clean up the columns that are saved
+                cols_to_save = list(svi.svi_data_shp.columns)
+                cols_to_save.remove("GEO_ID")
+                cols_to_save.remove("GEOID_short")
+                cols_to_save.remove("NAME")
+                cols_to_save.remove("composite_SVI")
+            else:
+                # Only save the composite_svi_z
+                cols_to_save = ["composite_svi_z", "geometry"]
+
+            svi_exp_joined = gpd.sjoin(exposure_data, svi.svi_data_shp[cols_to_save], how="left")
             svi_exp_joined.drop(columns=["geometry"], inplace=True)
             svi_exp_joined = pd.DataFrame(svi_exp_joined)
+            svi_exp_joined.rename(columns={"composite_svi_z": "SVI"}, inplace=True)
             self.exposure.exposure_db = svi_exp_joined
     
     def setup_equity_data(
         self,
         census_key: str,
-        state_abbreviation: str,
-        blockgroup_fn: str = None,
         year_data: int = None,
-        county: str = None
     ):
         """Setup the download procedure for equity data similarly to the SVI setup
 
@@ -654,9 +675,20 @@ def setup_equity_data(
             Census data
         path : Union[str, Path]
             The path to the codebook excel
-        state_abbreviation : str
-            The abbreviation of the US state one would like to use in the analysis
         """
+        # First find the state(s) and county/counties where the exposure data is 
+        # located in
+        us_states_counties = self.data_catalog.get_dataframe("us_states_counties")
+        counties, states = locate_from_bounding_box(self.exposure.bounding_box())
+        states_dict = list_of_states()
+        state_abbreviation = []
+        for state in states:
+            try:
+                state_abbreviation.append(states_dict[state])
+            except KeyError:
+                self.logger.warning(f"State {state} not found.")
+
+        county_numbers = get_us_county_numbers(counties, us_states_counties)
 
         # Create equity object
         equity = EquityData(self.data_catalog, self.logger)
@@ -668,7 +700,7 @@ def setup_equity_data(
         equity.download_census_data(year_data)
         equity.rename_census_data()
         equity.match_geo_ID()
-        equity.download_shp_geom(year_data, county)
+        equity.download_shp_geom(year_data, county_numbers)
         equity.merge_equity_data_shp()
 
         self.set_tables(df=equity.equity_data_shp, name="equity_data")
@@ -886,8 +918,11 @@ def write_tables(self) -> None:
                 # The default location and save settings of the vulnerability curves
                 fn = "vulnerability/vulnerability_identifiers.csv"
                 kwargs = {"index": False}
-            elif "social_vulnerability" in name:
-                fn = f"exposure/{name}.csv"
+            elif name == "social_vulnerability_scores":
+                fn = f"exposure/SVI/{name}.csv"
+                kwargs = {"index": False}
+            elif name == "equity_data":
+                fn = f"exposure/equity/{name}.csv"
                 kwargs = {"index": False}
 
             # Other, can also return an error or pass silently
diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py
index 2a4dc14c..426f8fc4 100644
--- a/hydromt_fiat/workflows/equity_data.py
+++ b/hydromt_fiat/workflows/equity_data.py
@@ -1,33 +1,32 @@
 from census import Census  # install in your environment using pip install Census
 from us import states  # install in your environment using pip install us
 from hydromt.data_catalog import DataCatalog
-from hydromt.log import setuplog
-import logging
+from logging import Logger
 import pandas as pd
-import numpy as np
 import geopandas as gpd
 from urllib.request import urlopen
 from io import BytesIO
 from zipfile import ZipFile
 from pathlib import Path
-
+from typing import List
 
 
 class EquityData:
-    def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = None):
+    def __init__(self, data_catalog: DataCatalog, logger: Logger):
         self.data_catalog = data_catalog
         self.census_key = Census
         self.download_codes = {}
-        self.state_fips = 0
+        self.state_fips = []
         self.pd_census_data = pd.DataFrame()
         self.codebook = pd.DataFrame()
         self.indicator_groups = {}
         self.processed_census_data = pd.DataFrame()
 
         self.pd_domain_scores_geo = pd.DataFrame()
-        self.logger = setuplog("SVI", log_level=10)
+        self.logger = logger
         self.svi_data_shp = gpd.GeoDataFrame()
         self.block_groups = gpd.GeoDataFrame()
+        self.temp_folder = []
 
 
     def set_up_census_key(self, census_key: str):
@@ -45,8 +44,7 @@ def set_up_census_key(self, census_key: str):
             f"your census key {census_key} is used to download data from the Census website "
         )
 
-
-    def set_up_state_code(self, state_abbreviation: str):
+    def set_up_state_code(self, state_abbreviation: List[str]):
         """download census data for a state
 
         Parameters
@@ -54,12 +52,10 @@ def set_up_state_code(self, state_abbreviation: str):
         state_abbreviation : str
             Abbreviation of the state for which you want to set up the census data download
         """
-        state = [
-            state_abbreviation
-        ]  # read in the state abbreviation as specified in the ini file
-        state_obj = getattr(states, state[0])
-        self.state_fips = state_obj.fips
-        self.logger.info(f"The state abbreviation specified is: {state_abbreviation}")
+        for state in state_abbreviation:
+            self.logger.info(f"The state abbreviation specified is: {state}")
+            state_obj = getattr(states, state)
+            self.state_fips.append(state_obj.fips)
 
     def variables_to_download(self):
         self.download_variables = ['B01003_001E', 'B19301_001E', 'NAME', 'GEO_ID']  # TODO: later make this a user input?
@@ -67,21 +63,23 @@ def variables_to_download(self):
     def download_census_data(self, year_data):
         """download the census data
         it is possible to also make the county, tract and blockgroup flexible so that a user could specify exactly what to download
-        But: bear in mind, with social vulneraiblity we compare areas against each other, so Ideally you would have a large enough dataset (for statistical and validity purposes)
         """
-        download_census_codes = self.census_key.acs.state_county_blockgroup(
-            fields=self.download_variables,
-            state_fips=self.state_fips,
-            county_fips="*",
-            tract="*",
-            blockgroup="*",
-            year=year_data
-        )
-        self.pd_census_data = pd.DataFrame(download_census_codes)
+        dfs = []
+        for sf in self.state_fips:
+            download_census_codes = self.census_key.acs.state_county_blockgroup(
+                fields=self.download_variables,
+                state_fips=sf,
+                county_fips="*",
+                tract="*",
+                blockgroup="*",
+                year=year_data
+            )
+            dfs.append(pd.DataFrame(download_census_codes))
+
+        self.pd_census_data = pd.concat(dfs)
         self.logger.info(
             "The equity data was succesfully downloaded from the Census website"
         )
-        return self.pd_census_data
 
     def rename_census_data(self):
         """renaming the columns so that they have variable names instead of variable codes as their headers
@@ -136,51 +134,53 @@ def download_and_unzip(self, url, extract_to='.'):
         except Exception as e:
             print(f"Error during download and unzip: {e}")
 
-    def download_shp_geom(self, year_data, county):
+    def download_shp_geom(self, year_data: int, counties: List[str]):
         """Downloading the shapefiles from the government Tiger website
 
         Parameters
         ----------
         year_data : int
             The year for which you want to download the census data and the corresponding shapefiles (for geometry)
-        county : int
-            the county code in which your area of interest lies
+        counties : List[str]
+            A list of county codes in which your area of interest lies
         """
-        # Download shapefile of blocks 
-        if year_data == 2022:
-            url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{self.state_fips}{county}_faces.zip"
+        block_groups_list = []
+        for sf, county in zip(self.state_fips, counties):
+            # Download shapefile of blocks 
+            if year_data == 2022:
+                url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{sf}{county}_faces.zip"
+            elif year_data == 2021:
+                url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{sf}{county}_faces.zip"
+            elif year_data == 2020:
+                url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{sf}{county}_faces.zip"
+            else:
+                self.logger.warning(f"Year {year_data} not available from 'https://www2.census.gov/geo/tiger'")
+                return
+            
+            # Save shapefiles 
+            folder = f'Shapefiles/{sf}{county}/{year_data}'
+            self.temp_folder.append(folder)
+            self.logger.info(f"Downloading the county shapefile for {str(year_data)}")
+            self.download_and_unzip(url, folder)
+            shapefiles = list(Path(folder).glob("*.shp"))
+            if shapefiles:
+                shp = gpd.read_file(shapefiles[0])
+                self.logger.info("The shapefile was downloaded")
+            else:
+                print("No shapefile found in the directory.")
+            
+            # Dissolve shapefile based on block groups
             code = "20"
-            self.logger.info("Downloading the county shapefile for 2022")
-        elif year_data == 2021:
-            url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{self.state_fips}{county}_faces.zip"
-            code = "20"
-            self.logger.info("Downloading the county shapefile for 2021")
-        elif year_data == 2020:
-            url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{self.state_fips}{county}_faces.zip"
-            code = "20"
-            self.logger.info("Downloading the county shapefile for 2020")
-        else:
-            print("year not supported")
-            return
-        # Save shapefiles 
-        fold_name = f'Shapefiles/{self.state_fips}{county}/{year_data}'
-        self.download_and_unzip(url, fold_name)
-        shapefiles = list(Path(fold_name).glob("*.shp"))
-        if shapefiles:
-            self.shp = gpd.read_file(shapefiles[0])
-            self.logger.info("The shapefile was downloaded")
-        else:
-            print("No shapefile found in the directory.")
-        
-        # Dissolve shapefile based on block groups
-        attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"]
-        attrs = [attr + code for attr in attrs]
-
-        self.block_groups = self.shp.dissolve(by=attrs, as_index=False)
-        self.block_groups = self.block_groups[attrs + ["geometry"]]
-        # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
-        self.block_groups["GEO_ID"] = "1500000US" + self.block_groups['STATEFP' + code].astype(str) + self.block_groups['COUNTYFP' + code].astype(str) + self.block_groups['TRACTCE' + code].astype(str) + self.block_groups['BLKGRPCE' + code].astype(str)
+            attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"]
+            attrs = [attr + code for attr in attrs]
+
+            block_groups_shp = shp.dissolve(by=attrs, as_index=False)
+            block_groups_shp = block_groups_shp[attrs + ["geometry"]]
+            # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
+            block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
+            block_groups_list.append(block_groups_shp)
 
+        self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list))
 
     def merge_equity_data_shp(self):
         """Merges the geometry data with the equity_data downloaded"""
diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py
index 8b98e3ad..b949b82d 100644
--- a/hydromt_fiat/workflows/exposure_vector.py
+++ b/hydromt_fiat/workflows/exposure_vector.py
@@ -1,5 +1,4 @@
 import json
-import rasterio
 import logging
 from pathlib import Path
 from typing import Any, List, Optional, Union
@@ -33,9 +32,7 @@
 )
 
 from hydromt_fiat.workflows.roads import get_max_potential_damage_roads
-from rasterio.features import rasterize
-from xrspatial import zonal_stats
-import xarray as xr
+
 
 class ExposureVector(Exposure):
     _REQUIRED_COLUMNS = ["Object ID", "Extraction Method", "Ground Floor Height"]
@@ -92,6 +89,11 @@ def __init__(
         self.unit = unit
         self._geom_names = list()  # A list of (original) names of the geometry (files)
 
+    def bounding_box(self):
+        if len(self.exposure_geoms) > 0:
+            gdf = gpd.GeoDataFrame(pd.concat(self.exposure_geoms, ignore_index=True))
+            return gdf.total_bounds
+
     def read_table(self, fn: Union[str, Path]):
         """Read the Delft-FIAT exposure data.
 
@@ -195,15 +197,21 @@ def setup_buildings_from_single_source(
         self.setup_extraction_method(extraction_method)
 
         # Set the exposure_geoms
-        self.set_exposure_geoms(gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]], crs=self.crs))
+        self.set_exposure_geoms(
+            gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]], crs=self.crs)
+        )
 
         # Set the name to the geom_names
         self.set_geom_names("buildings")
 
         # Set the ground floor height if not yet set
-        #TODO: Check a better way to access to to the geometries, self.empousure_geoms is a list an not a geodataframe
+        # TODO: Check a better way to access to to the geometries, self.empousure_geoms is a list an not a geodataframe
         if ground_elevation_file is not None:
-            self.setup_ground_elevation(ground_elevation_file, self.exposure_db, gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]]))
+            self.setup_ground_elevation(
+                ground_elevation_file,
+                self.exposure_db,
+                gpd.GeoDataFrame(self.exposure_db[["Object ID", "geometry"]]),
+            )
 
         # Remove the geometry column from the exposure_db
         if "geometry" in self.exposure_db:
@@ -217,7 +225,9 @@ def setup_roads(
     ):
         self.logger.info("Setting up roads...")
         if str(source).upper() == "OSM":
-            polygon = self.region["geometry"].values[0]  #TODO check if this works each time
+            polygon = self.region["geometry"].values[
+                0
+            ]  # TODO check if this works each time
             roads = get_roads_from_osm(polygon, road_types)
 
             if roads.empty:
@@ -247,7 +257,9 @@ def setup_roads(
 
         # Add the max potential damage and the length of the segments to the roads
         road_damage = self.data_catalog.get_dataframe(road_damage)
-        roads[["Max Potential Damage: Structure", "Segment Length [m]"]] = get_max_potential_damage_roads(roads, road_damage)
+        roads[
+            ["Max Potential Damage: Structure", "Segment Length [m]"]
+        ] = get_max_potential_damage_roads(roads, road_damage)
 
         self.set_exposure_geoms(roads[["Object ID", "geometry"]])
         self.set_geom_names("roads")
@@ -275,7 +287,9 @@ def setup_buildings_from_multiple_sources(
         self.setup_max_potential_damage(max_potential_damage, damage_types, country)
         self.setup_ground_floor_height(ground_floor_height)
         self.setup_extraction_method(extraction_method)
-        self.setup_ground_elevation(ground_elevation_file, self.exposure_db, self.get_full_gdf(self.exposure_db))
+        self.setup_ground_elevation(
+            ground_elevation_file, self.exposure_db, self.get_full_gdf(self.exposure_db)
+        )
 
     def setup_asset_locations(self, asset_locations: str) -> None:
         """Set up the asset locations (points or polygons).
@@ -519,7 +533,7 @@ def setup_ground_floor_height(
             be either 'nearest' (nearest neighbor) or 'intersection'. By default
             'nearest'.
         max_dist : float
-            The maximum distance for the nearest join measured in meters, by default 
+            The maximum distance for the nearest join measured in meters, by default
             set to 10 meters.
         """
         if ground_floor_height:
@@ -535,7 +549,9 @@ def setup_ground_floor_height(
                 # A single file is used to assign the ground floor height to the assets
                 gfh = self.data_catalog.get_geodataframe(ground_floor_height)
                 gdf = self.get_full_gdf(self.exposure_db)
-                gdf = join_spatial_data(gdf, gfh, attr_name, method, max_dist, self.logger)
+                gdf = join_spatial_data(
+                    gdf, gfh, attr_name, method, max_dist, self.logger
+                )
                 self.exposure_db = self._set_values_from_other_column(
                     gdf, "Ground Floor Height", attr_name
                 )
@@ -549,7 +565,9 @@ def setup_ground_floor_height(
 
     def setup_max_potential_damage(
         self,
-        max_potential_damage: Union[int, float, str, Path, List[str], List[Path], pd.DataFrame]=None,
+        max_potential_damage: Union[
+            int, float, str, Path, List[str], List[Path], pd.DataFrame
+        ] = None,
         damage_types: Union[List[str], str, None] = None,
         country: Union[str, None] = None,
         target_attribute: Union[str, List[str], None] = None,
@@ -581,23 +599,27 @@ def setup_max_potential_damage(
 
         if isinstance(damage_types, str):
             damage_types = [damage_types]
-        
-        if isinstance(max_potential_damage, pd.DataFrame
-            ):
-                self.update_max_potential_damage(
-                    updated_max_potential_damages=max_potential_damage
-                )
+
+        if isinstance(max_potential_damage, pd.DataFrame):
+            self.update_max_potential_damage(
+                updated_max_potential_damages=max_potential_damage
+            )
         elif isinstance(max_potential_damage, int) or isinstance(
             max_potential_damage, float
         ):
             # Set the column(s) to a single value
             for damage_type in damage_types:
-                self.exposure_db[f"Max Potential Damage: {damage_type}"] = max_potential_damage
+                self.exposure_db[
+                    f"Max Potential Damage: {damage_type}"
+                ] = max_potential_damage
 
         elif isinstance(max_potential_damage, list):
             # Multiple files are used to assign the ground floor height to the assets
             NotImplemented
-        elif max_potential_damage in ["jrc_damage_values", "hazus_max_potential_damages"]:
+        elif max_potential_damage in [
+            "jrc_damage_values",
+            "hazus_max_potential_damages",
+        ]:
             if max_potential_damage == "jrc_damage_values":
                 damage_source = self.data_catalog.get_dataframe(max_potential_damage)
                 if country is None:
@@ -611,9 +633,11 @@ def setup_max_potential_damage(
             elif max_potential_damage == "hazus_max_potential_damages":
                 damage_source = self.data_catalog.get_dataframe(max_potential_damage)
                 damage_values = preprocess_hazus_damage_values(damage_source)
-            
+
             # Calculate the area of each object
-            gdf = self.get_full_gdf(self.exposure_db)[["Primary Object Type", "geometry"]]
+            gdf = self.get_full_gdf(self.exposure_db)[
+                ["Primary Object Type", "geometry"]
+            ]
             gdf = get_area(gdf)
 
             # Set the damage values to the exposure data
@@ -623,7 +647,8 @@ def setup_max_potential_damage(
                     self.exposure_db[
                         f"Max Potential Damage: {damage_type.capitalize()}"
                     ] = [
-                        damage_values[building_type][damage_type.lower()] * square_meters
+                        damage_values[building_type][damage_type.lower()]
+                        * square_meters
                         for building_type, square_meters in zip(
                             gdf["Primary Object Type"], gdf["area"]
                         )
@@ -637,7 +662,7 @@ def setup_max_potential_damage(
             max_potential_damage, Path
         ):
             # When the max_potential_damage is a string but not jrc_damage_values
-            # or hazus_max_potential_damages. Here, a single file is used to 
+            # or hazus_max_potential_damages. Here, a single file is used to
             # assign the ground floor height to the assets
             gfh = self.data_catalog.get_geodataframe(max_potential_damage)
             gdf = self.get_full_gdf(self.exposure_db)
@@ -652,17 +677,18 @@ def setup_ground_elevation(
         exposure_db: pd.DataFrame,
         exposure_geoms: gpd.GeoDataFrame,
     ) -> None:
-
         if ground_elevation:
             ground_elevation_from_dem(
                 ground_elevation=ground_elevation,
                 exposure_db=exposure_db,
                 exposure_geoms=exposure_geoms,
             )
-        
+
         else:
-            print('Ground elevation is not recognized by the setup_ground_elevation function\n Ground elevation will be set to 0')
-            exposure_db['Ground Elevation'] = 0
+            print(
+                "Ground elevation is not recognized by the setup_ground_elevation function\n Ground elevation will be set to 0"
+            )
+            exposure_db["Ground Elevation"] = 0
 
     def update_max_potential_damage(
         self, updated_max_potential_damages: pd.DataFrame
@@ -884,7 +910,9 @@ def setup_new_composite_areas(
         elevation_reference: str,
         path_ref: str = None,
         attr_ref: str = None,
-        ground_elevation: Union[int, float, None, str, Path, List[str], List[Path]]=None,
+        ground_elevation: Union[
+            int, float, None, str, Path, List[str], List[Path]
+        ] = None,
     ) -> None:
         """Adds one or multiple (polygon) areas to the exposure database with
         a composite damage function and a percentage of the total damage.
@@ -924,7 +952,9 @@ def setup_new_composite_areas(
 
         percent_growth = float(percent_growth) / 100
         geom_file = Path(geom_file)
-        assert geom_file.is_file(), f"File {str(geom_file)} is missing, cannot set up a new composite area."
+        assert (
+            geom_file.is_file()
+        ), f"File {str(geom_file)} is missing, cannot set up a new composite area."
 
         # Calculate the total damages for the new object, for the indicated damage types
         new_object_damages = self.calculate_damages_new_exposure_object(
@@ -1047,8 +1077,8 @@ def setup_new_composite_areas(
             )
 
         # Adding elevation data into the new objects
-        self.setup_ground_elevation(ground_elevation,new_objects,_new_exposure_geoms)
-        
+        self.setup_ground_elevation(ground_elevation, new_objects, _new_exposure_geoms)
+
         # Update the exposure_db
         self.exposure_db = pd.concat([self.exposure_db, new_objects]).reset_index(
             drop=True
@@ -1073,7 +1103,9 @@ def link_exposure_vulnerability(
             linking_per_damage_type = exposure_linking_table.loc[
                 exposure_linking_table["Damage Type"] == damage_type, :
             ]
-            assert not linking_per_damage_type.empty, f"Damage type {damage_type} not found in the exposure-vulnerability linking table"
+            assert (
+                not linking_per_damage_type.empty
+            ), f"Damage type {damage_type} not found in the exposure-vulnerability linking table"
 
             # Create a dictionary that links the exposure data to the vulnerability data
             linking_dict = dict(
@@ -1473,4 +1505,4 @@ def _set_values_from_other_column(
             df[col_to_copy].notna(), col_to_copy
         ]
         del df[col_to_copy]
-        return df
\ No newline at end of file
+        return df
diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py
index 3411bfab..2ccaefb6 100644
--- a/hydromt_fiat/workflows/gis.py
+++ b/hydromt_fiat/workflows/gis.py
@@ -9,6 +9,7 @@
 import numpy as np
 import xarray as xr
 from pathlib import Path
+from geopy.geocoders import Nominatim
 
 
 def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
@@ -197,9 +198,11 @@ def join_spatial_data(
         )
     except AssertionError as e:
         logger.warning(e)
-        logger.warning("Reprojecting the GeoDataFrame from "
-                       f"{get_crs_str_from_gdf(right_gdf.crs)} to "
-                       f"{get_crs_str_from_gdf(left_gdf.crs)}.")
+        logger.warning(
+            "Reprojecting the GeoDataFrame from "
+            f"{get_crs_str_from_gdf(right_gdf.crs)} to "
+            f"{get_crs_str_from_gdf(left_gdf.crs)}."
+        )
         right_gdf = right_gdf.to_crs(left_gdf.crs)
 
     left_gdf_type = check_geometry_type(left_gdf)
@@ -243,41 +246,62 @@ def ground_elevation_from_dem(
     exposure_db: pd.DataFrame,
     exposure_geoms: gpd.GeoDataFrame,
 ) -> None:
-    
     # This function was developed by Willem https://github.com/interTwin-eu/DT-flood/blob/DemonstrationNotebooks/Notebooks/SetupFIAT.ipynb
-    #TODO: Find equivalent functions in hydromt
+    # TODO: Find equivalent functions in hydromt
     # Read in the DEM
     dem = rasterio.open(ground_elevation)
-    # gdf = self.get_full_gdf(exposure_db)  
+    # gdf = self.get_full_gdf(exposure_db)
     gdf = exposure_geoms.to_crs(dem.crs.data)
     # Create a list of geometries plus a label for rasterize
     # The labels start at 1 since the label 0 is reserved for everything not in a geometry
     # The order of each tuple is (geometry,label)
-    shapes = list(enumerate(gdf['geometry'].values))
-    shapes = [(t[1],t[0]+1) for t in shapes]
+    shapes = list(enumerate(gdf["geometry"].values))
+    shapes = [(t[1], t[0] + 1) for t in shapes]
 
     rasterized = rasterize(
-        shapes=shapes,
-        out_shape=dem.shape,
-        transform=dem.transform,
-        all_touched=True
+        shapes=shapes, out_shape=dem.shape, transform=dem.transform, all_touched=True
     )
     # zonal_stats needs xarrays as input
     rasterized = xr.DataArray(rasterized)
 
     # Calculate the zonal statistics
-    zonal_out = zonal_stats(rasterized,xr.DataArray(dem.read(1)),
-                            stats_funcs=['mean'],
-                            nodata_values=np.nan)
+    zonal_out = zonal_stats(
+        rasterized,
+        xr.DataArray(dem.read(1)),
+        stats_funcs=["mean"],
+        nodata_values=np.nan,
+    )
 
     # The zero label is for pixels not in a geometry so we discard them
     zonal_out = zonal_out.drop(0)
 
     # Array storing the zonal means
     # Store the calculated means at index corresponding to their label
-    zonal_means = np.full(len(shapes),np.nan)
-    zonal_means[[zonal_out['zone'].values-1]] = zonal_out['mean'].values
+    zonal_means = np.full(len(shapes), np.nan)
+    zonal_means[[zonal_out["zone"].values - 1]] = zonal_out["mean"].values
 
     # # Add Ground Elevation column and get rid of nans in the appropriate way
-    exposure_db['Ground Elevation'] = zonal_means
-    exposure_db['Ground Elevation'].bfill(inplace=True)
+    exposure_db["Ground Elevation"] = zonal_means
+    exposure_db["Ground Elevation"].bfill(inplace=True)
+
+
+def locate_from_bounding_box(bounding_box):
+    geolocator = Nominatim(user_agent="hydromt-fiat")
+
+    search = [
+        (bounding_box[1], bounding_box[0]),
+        (bounding_box[1], bounding_box[2]),
+        (bounding_box[3], bounding_box[0]),
+        (bounding_box[3], bounding_box[2]),
+    ]
+    locations = [geolocator.reverse(s) for s in search]
+    locations_list = [location[0].split(", ") for location in locations]
+    locations_list_no_numbers = [[y for y in x if not y.isnumeric()] for x in locations_list]
+    counties = [y for x in locations_list for y in x if "county" in y.lower()]
+    states = [x[-2] for x in locations_list_no_numbers]
+
+    counties_states_combo = set(list(zip(counties, states)))
+    counties = [c[0] for c in counties_states_combo]
+    states = [c[1] for c in counties_states_combo]
+
+    return counties, states
diff --git a/hydromt_fiat/workflows/social_vulnerability_index.py b/hydromt_fiat/workflows/social_vulnerability_index.py
index de68b9ca..c838007b 100644
--- a/hydromt_fiat/workflows/social_vulnerability_index.py
+++ b/hydromt_fiat/workflows/social_vulnerability_index.py
@@ -1,8 +1,7 @@
 from census import Census  # install in your environment using pip install Census
 from us import states  # install in your environment using pip install us
 from hydromt.data_catalog import DataCatalog
-from hydromt.log import setuplog
-import logging
+from logging import Logger
 import pandas as pd
 import numpy as np
 import geopandas as gpd
@@ -10,6 +9,7 @@
 from io import BytesIO
 from zipfile import ZipFile
 from pathlib import Path
+from typing import List
 
 def list_of_states():
     states = {
@@ -71,11 +71,11 @@ def list_of_states():
     return states_inverted
 
 class SocialVulnerabilityIndex:
-    def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = None):
+    def __init__(self, data_catalog: DataCatalog, logger: Logger):
         self.data_catalog = data_catalog
         self.census_key = Census
         self.download_codes = {}
-        self.state_fips = 0
+        self.state_fips = []
         self.pd_census_data = pd.DataFrame()
         self.codebook = pd.DataFrame()
         self.indicator_groups = {}
@@ -87,9 +87,10 @@ def __init__(self, data_catalog: DataCatalog = None, logger: logging.Logger = No
         self.pd_domain_scores = pd.DataFrame()
         self.pd_domain_scores_z = pd.DataFrame()
         self.pd_domain_scores_geo = pd.DataFrame()
-        self.logger = setuplog("SVI", log_level=10)
+        self.logger = logger
         self.svi_data_shp = gpd.GeoDataFrame()
         self.block_groups = gpd.GeoDataFrame()
+        self.temp_folder = []
     
     def read_dataset(self, path: str):
         """If you have your own dataset (e.g. already downloaded census data), you can use this to load it from a csv
@@ -132,7 +133,7 @@ def set_up_download_codes(self):
         self.download_codes = list(self.codebook["Census_code_withE"])
         self.download_codes.append("NAME")
 
-    def set_up_state_code(self, state_abbreviation: str):
+    def set_up_state_code(self, state_abbreviation: List[str]):
         """download census data for a state
 
         Parameters
@@ -140,31 +141,33 @@ def set_up_state_code(self, state_abbreviation: str):
         state_abbreviation : str
             Abbreviation of the state for which you want to set up the census data download
         """
-        state = [
-            state_abbreviation
-        ]  # read in the state abbreviation as specified in the ini file
-        state_obj = getattr(states, state[0])
-        self.state_fips = state_obj.fips
-        self.logger.info(f"The state abbreviation specified is: {state_abbreviation}")
+        for state in state_abbreviation:
+            self.logger.info(f"The state abbreviation specified is: {state}")
+            state_obj = getattr(states, state)
+            self.state_fips.append(state_obj.fips)
+            
 
     def download_census_data(self, year_data):
         """download the census data
         it is possible to also make the county, tract and blockgroup flexible so that a user could specify exactly what to download
         But: bear in mind, with social vulneraiblity we compare areas against each other, so Ideally you would have a large enough dataset (for statistical and validity purposes)
         """
-        download_census_codes = self.census_key.acs.state_county_blockgroup(
-            fields=self.download_codes,
-            state_fips=self.state_fips,
-            county_fips="*",
-            tract="*",
-            blockgroup="*",
-            year=year_data
-        )
-        self.pd_census_data = pd.DataFrame(download_census_codes)
+        dfs = []
+        for sf in self.state_fips:
+            download_census_codes = self.census_key.acs.state_county_blockgroup(
+                fields=self.download_codes,
+                state_fips=sf,
+                county_fips="*",
+                tract="*",
+                blockgroup="*",
+                year=year_data
+            )
+            dfs.append(pd.DataFrame(download_census_codes))
+        
+        self.pd_census_data = pd.concat(dfs)
         self.logger.info(
             "The census data was successfully downloaded using the specified variables in the codebook Excel"
         )
-        return self.pd_census_data
 
     def rename_census_data(self, from_name: str, to_name: str):
         """renaming the columns so that they have variable names instead of variable codes as their headers
@@ -441,7 +444,7 @@ def normalization_svi_data(self):
                 )
                 svi_fiat_names.append(column + "_norm")
             else:
-                print("Direction is not provided")
+                self.logger.warning(f"Normalization direction for {column} is not provided")
         self.logger.info(
             "The data is standardized according to their relation specified in the codebook Excel (inverse or normal z-scores)"
         )
@@ -549,52 +552,55 @@ def download_and_unzip(self, url, extract_to='.'):
             zipfile = ZipFile(BytesIO(http_response.read()))
             zipfile.extractall(path=extract_to)
         except Exception as e:
-            print(f"Error during download and unzip: {e}")
+            self.logger.warning(f"Error during download and unzip: {e}")
 
-    def download_shp_geom(self, year_data, county):
+    def download_shp_geom(self, year_data: int, counties: List[str]):
         """Downloading the shapefiles from the government Tiger website
 
         Parameters
         ----------
         year_data : int
             The year for which you want to download the census data and the corresponding shapefiles (for geometry)
-        county : int
-            the county code in which your area of interest lies
+        counties : List[str]
+            A list of county codes in which your area of interest lies
         """
-        # Download shapefile of blocks 
-        if year_data == 2022:
-            url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{self.state_fips}{county}_faces.zip"
-            code = "20"
-            self.logger.info("Downloading the county shapefile for 2022")
-        elif year_data == 2021:
-            url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{self.state_fips}{county}_faces.zip"
-            code = "20"
-            self.logger.info("Downloading the county shapefile for 2021")
-        elif year_data == 2020:
-            url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{self.state_fips}{county}_faces.zip"
+        block_groups_list = []
+        for sf, county in zip(self.state_fips, counties):
+            # Download shapefile of blocks
+            if year_data == 2022:
+                url = f"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/FACES/tl_rd22_{sf}{county}_faces.zip"
+            elif year_data == 2021:
+                url = f"https://www2.census.gov/geo/tiger/TIGER2021/FACES/tl_2021_{sf}{county}_faces.zip"
+            elif year_data == 2020:
+                url = f"https://www2.census.gov/geo/tiger/TIGER2020PL/LAYER/FACES/tl_2020_{sf}{county}_faces.zip"
+            else:
+                self.logger.warning(f"Year {year_data} not available from 'https://www2.census.gov/geo/tiger'")
+                return
+
+            # Save shapefiles
+            folder = f'Shapefiles/{sf}{county}/{year_data}'
+            self.temp_folder.append(folder)
+            self.logger.info(f"Downloading the county shapefile for {str(year_data)}")
+            self.download_and_unzip(url, folder)
+            shapefiles = list(Path(folder).glob("*.shp"))
+            if shapefiles:
+                shp = gpd.read_file(shapefiles[0])
+                self.logger.info("The shapefile was downloaded")
+            else:
+                self.logger.warning("No county shapefile found in the directory.")
+            
+            # Dissolve shapefile based on block groups
             code = "20"
-            self.logger.info("Downloading the county shapefile for 2020")
-        else:
-            print("year not supported")
-            return
-        # Save shapefiles 
-        fold_name = f'Shapefiles/{self.state_fips}{county}/{year_data}'
-        self.download_and_unzip(url, fold_name)
-        shapefiles = list(Path(fold_name).glob("*.shp"))
-        if shapefiles:
-            self.shp = gpd.read_file(shapefiles[0])
-            self.logger.info("The shapefile was downloaded")
-        else:
-            print("No shapefile found in the directory.")
-        
-        # Dissolve shapefile based on block groups
-        attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"]
-        attrs = [attr + code for attr in attrs]
-
-        self.block_groups = self.shp.dissolve(by=attrs, as_index=False)
-        self.block_groups = self.block_groups[attrs + ["geometry"]]
-        # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
-        self.block_groups["GEO_ID"] = "1500000US" + self.block_groups['STATEFP' + code].astype(str) + self.block_groups['COUNTYFP' + code].astype(str) + self.block_groups['TRACTCE' + code].astype(str) + self.block_groups['BLKGRPCE' + code].astype(str)
+            attrs = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"]
+            attrs = [attr + code for attr in attrs]
+
+            block_groups_shp = shp.dissolve(by=attrs, as_index=False)
+            block_groups_shp = block_groups_shp[attrs + ["geometry"]]
+            # block_groups["Census_Bg"] = block_groups['TRACTCE' + code].astype(str) + "-block" + block_groups['BLKGRPCE' + code].astype(str)
+            block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
+            block_groups_list.append(block_groups_shp)
+
+        self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list))
 
 
     def merge_svi_data_shp(self):
diff --git a/hydromt_fiat/workflows/utils.py b/hydromt_fiat/workflows/utils.py
index 69d9d45a..c6c4511e 100644
--- a/hydromt_fiat/workflows/utils.py
+++ b/hydromt_fiat/workflows/utils.py
@@ -1,3 +1,6 @@
+import pandas as pd
+
+
 def detect_delimiter(csvFile):
     """From stackoverflow
     https://stackoverflow.com/questions/16312104/can-i-import-a-csv-file-and-automatically-infer-the-delimiter
@@ -10,3 +13,9 @@ def detect_delimiter(csvFile):
             return ","
     # default delimiter
     return ","
+
+
+def get_us_county_numbers(county_names: list, states_counties_table: pd.DataFrame) -> list:
+    county_numbers = list(states_counties_table.loc[states_counties_table["COUNTYNAME"].isin(county_names), "COUNTYFP"].values)
+    county_numbers = [str(cn).zfill(3) for cn in county_numbers]
+    return county_numbers
\ No newline at end of file
diff --git a/tests/test_SVI_exposure.py b/tests/test_SVI_exposure.py
index 70c4f87a..2f0c9b82 100644
--- a/tests/test_SVI_exposure.py
+++ b/tests/test_SVI_exposure.py
@@ -61,10 +61,7 @@
             "setup_social_vulnerability_index": {
                 "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9",
                 "codebook_fn": "social_vulnerability",
-                "state_abbreviation": "SC",
-                "blockgroup_fn": "blockgroup_shp_data",
                 "year_data": 2021,
-                "county": "019"
             },
         },
     }
@@ -92,7 +89,7 @@ def test_SVI_exposure(case):
     assert root.joinpath("exposure", "region.gpkg").exists()
 
     # Check if the social vulnerability data exists
-    assert root.joinpath("exposure", "social_vulnerability_scores.csv").exists()
+    assert root.joinpath("exposure", "SVI", "social_vulnerability_scores.csv").exists()
 
     # Check if the vulnerability data exists
     assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists()
\ No newline at end of file
diff --git a/tests/test_equity_data.py b/tests/test_equity_data.py
index 25395c9e..6ffee54d 100644
--- a/tests/test_equity_data.py
+++ b/tests/test_equity_data.py
@@ -24,9 +24,7 @@
 
             "setup_equity_data": {
                 "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9",
-                "state_abbreviation": "SC",
                 "year_data": 2021,
-                "county": "019"
             },
         },
     }
@@ -43,6 +41,16 @@ def test_equity_data(case):
     data_libs = EXAMPLEDIR.joinpath(_cases[case]["data_catalogue"])
     fm = FiatModel(root=root, mode="w", data_libs=data_libs, logger=logger)
 
-
     fm.build(opt=_cases[case]["configuration"])
     fm.write()
+
+    # Check if the exposure data exists
+    assert root.joinpath("exposure", "buildings.gpkg").exists()
+    assert root.joinpath("exposure", "exposure.csv").exists()
+    assert root.joinpath("exposure", "region.gpkg").exists()
+
+    # Check if the equity data exists
+    assert root.joinpath("exposure", "equity", "equity_data.csv").exists()
+
+    # Check if the vulnerability data exists
+    assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists()
\ No newline at end of file
diff --git a/tests/test_vulnerability_exposure.py b/tests/test_vulnerability_exposure.py
index 558acc98..e8ec88d4 100644
--- a/tests/test_vulnerability_exposure.py
+++ b/tests/test_vulnerability_exposure.py
@@ -91,10 +91,7 @@
             "setup_social_vulnerability_index": {
                 "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9",
                 "codebook_fn": "social_vulnerability",
-                "state_abbreviation": "SC",
-                "blockgroup_fn": "blockgroup_shp_data",
                 "year_data": 2021,
-                "county": "019"
             },
         },
         "region": _region,

From 92e27cc191d6e5f74dbacc6fed3fbc6506c06d7a Mon Sep 17 00:00:00 2001
From: Frederique <frederique.degroen@deltares.nl>
Date: Fri, 8 Dec 2023 16:17:56 +0100
Subject: [PATCH 4/4] Updating tests, cleaning data, adding different folders
 in the output

---
 hydromt_fiat/fiat.py                          |  2 +-
 hydromt_fiat/workflows/equity_data.py         |  3 ++
 .../workflows/social_vulnerability_index.py   |  3 ++
 tests/test_equity_data.py                     | 44 +++++++++++++++++--
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py
index 50c6ef63..6ee2104a 100644
--- a/hydromt_fiat/fiat.py
+++ b/hydromt_fiat/fiat.py
@@ -922,7 +922,7 @@ def write_tables(self) -> None:
                 fn = f"exposure/SVI/{name}.csv"
                 kwargs = {"index": False}
             elif name == "equity_data":
-                fn = f"exposure/equity/{name}.csv"
+                fn = f"equity/{name}.csv"
                 kwargs = {"index": False}
 
             # Other, can also return an error or pass silently
diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py
index 426f8fc4..236ab642 100644
--- a/hydromt_fiat/workflows/equity_data.py
+++ b/hydromt_fiat/workflows/equity_data.py
@@ -187,6 +187,9 @@ def merge_equity_data_shp(self):
         self.equity_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left")
         self.equity_data_shp = gpd.GeoDataFrame(self.equity_data_shp)
 
+        # Delete the rows that do not have a geometry column
+        self.equity_data_shp = self.equity_data_shp.loc[self.equity_data_shp["geometry"].notnull()]
+
         #self.svi_data_shp.drop(columns=columns_to_drop, inplace=True)
         self.equity_data_shp = self.equity_data_shp.to_crs(epsg=4326)
         self.logger.info(
diff --git a/hydromt_fiat/workflows/social_vulnerability_index.py b/hydromt_fiat/workflows/social_vulnerability_index.py
index c838007b..4713040d 100644
--- a/hydromt_fiat/workflows/social_vulnerability_index.py
+++ b/hydromt_fiat/workflows/social_vulnerability_index.py
@@ -608,6 +608,9 @@ def merge_svi_data_shp(self):
         self.svi_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left")
         self.svi_data_shp = gpd.GeoDataFrame(self.svi_data_shp)
 
+        # Delete the rows that do not have a geometry column
+        self.svi_data_shp = self.svi_data_shp.loc[self.svi_data_shp["geometry"].notnull()]
+
         #self.svi_data_shp.drop(columns=columns_to_drop, inplace=True)
         self.svi_data_shp = self.svi_data_shp.to_crs(epsg=4326)
         self.logger.info(
diff --git a/tests/test_equity_data.py b/tests/test_equity_data.py
index 6ffee54d..005cac71 100644
--- a/tests/test_equity_data.py
+++ b/tests/test_equity_data.py
@@ -10,10 +10,33 @@
 )
 DATADIR = Path().absolute() / "hydromt_fiat" / "data"
 
+_region = {
+    "type": "FeatureCollection",
+    "features": [
+        {
+            "type": "Feature",
+            "properties": {},
+            "geometry": {
+                "coordinates": [
+                    [
+                        [-80.0808, 32.7005],
+                        [-79.8756, 32.8561],
+                        [-79.8756, 32.7005],
+                        [-80.0808, 32.8561],
+                        [-80.0808, 32.7005],
+                    ]
+                ],
+                "type": "Polygon",
+            },
+        }
+    ],
+}
+
 _cases = {
     "Test_equity_data": {
         "data_catalogue": DATADIR / "hydromt_fiat_catalog_USA.yml",
         "folder": "Test_equity_data",
+        "region": _region,
         "configuration": {
             "setup_global_settings": {"crs": "epsg:4326"},
             "setup_output": {
@@ -21,7 +44,20 @@
                 "output_csv_name": "output.csv",
                 "output_vector_name": "spatial.gpkg",
             },
-
+            "setup_vulnerability": {
+                "vulnerability_fn": "default_vulnerability_curves",
+                "vulnerability_identifiers_and_linking_fn": "default_hazus_iwr_linking",
+                "functions_mean": "default",
+                "functions_max": ["AGR1"],
+                "unit": "feet",
+            },
+            "setup_exposure_buildings": {
+                "asset_locations": "NSI",
+                "occupancy_type": "NSI",
+                "max_potential_damage": "NSI",
+                "ground_floor_height": "NSI",
+                "unit": "ft",
+            },
             "setup_equity_data": {
                 "census_key": "495a349ce22bdb1294b378fb199e4f27e57471a9",
                 "year_data": 2021,
@@ -30,6 +66,7 @@
     }
 }
 
+
 @pytest.mark.parametrize("case", list(_cases.keys()))
 # @pytest.mark.skip(reason="Needs to be updated")
 def test_equity_data(case):
@@ -41,7 +78,8 @@ def test_equity_data(case):
     data_libs = EXAMPLEDIR.joinpath(_cases[case]["data_catalogue"])
     fm = FiatModel(root=root, mode="w", data_libs=data_libs, logger=logger)
 
-    fm.build(opt=_cases[case]["configuration"])
+    region = gpd.GeoDataFrame.from_features(_cases[case]["region"], crs=4326)
+    fm.build(region={"geom": region}, opt=_cases[case]["configuration"])
     fm.write()
 
     # Check if the exposure data exists
@@ -53,4 +91,4 @@ def test_equity_data(case):
     assert root.joinpath("exposure", "equity", "equity_data.csv").exists()
 
     # Check if the vulnerability data exists
-    assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists()
\ No newline at end of file
+    assert root.joinpath("vulnerability", "vulnerability_curves.csv").exists()