From bc77f6b2df96b585e0f5d63f324a173107e86766 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 18:56:54 -0800 Subject: [PATCH 1/3] add isochrone and silhouette tests --- geosnap/tests/test_diagnostics.py | 51 +++++++++++++++++++------------ geosnap/tests/test_isochrones.py | 42 +++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 geosnap/tests/test_isochrones.py diff --git a/geosnap/tests/test_diagnostics.py b/geosnap/tests/test_diagnostics.py index ede4778d..f6923d22 100644 --- a/geosnap/tests/test_diagnostics.py +++ b/geosnap/tests/test_diagnostics.py @@ -1,17 +1,17 @@ from geosnap.io import get_census from geosnap import DataStore -from geosnap.analyze import find_k, find_region_k +from geosnap.analyze import find_k, find_region_k, cluster, regionalize from numpy.testing import assert_array_equal, assert_array_almost_equal +reno = get_census(msa_fips="39900", datastore=DataStore(), years=[2010]) +columns = [ + "median_household_income", + "p_poverty_rate", + "p_unemployment_rate", +] -def test_find_k(): - reno = get_census(msa_fips="39900", datastore=DataStore(), years=[2010]) - columns = [ - "median_household_income", - "p_poverty_rate", - "p_unemployment_rate", - ] +def test_find_k(): ks = find_k( reno, columns=columns, @@ -20,17 +20,10 @@ def test_find_k(): ) # Aspatial Clusters - assert_array_almost_equal( ks.T.values[0], [2,2,2]) - -def test_find_region_k(): - - reno = get_census(msa_fips="39900", datastore=DataStore(), years=[2010]) - columns = [ - "median_household_income", - "p_poverty_rate", - "p_unemployment_rate", - ] + assert_array_almost_equal(ks.T.values[0], [2, 2, 2]) + +def test_find_region_k(): ks = find_region_k( reno, columns=columns, @@ -39,4 +32,24 @@ def test_find_region_k(): ) # Aspatial Clusters - assert_array_almost_equal( ks.values[0],[2.,2.,2.,2.,2.]) \ No newline at end of file + assert_array_almost_equal(ks.values[0], [2.0, 2.0, 2.0, 2.0, 2.0]) + + +def test_cluster_diagnostics(): + ward, ward_mod = cluster( + reno, columns=columns, method="ward", n_clusters=5, return_model=True + ) + assert ward_mod.silhouette_score.round(4) == 0.2991 + assert ward_mod.davies_bouldin_score.round(4) == 1.0336 + assert ward_mod.calinski_harabasz_score.round(4) == 88.6627 + + +def test_region_diagnostics(): + ward, ward_mod = regionalize( + reno, columns=columns, method="ward_spatial", n_clusters=5, return_model=True + ) + assert ward_mod[2010].boundary_silhouette.boundary_silhouette.mean().round(4) == 0.2076 + assert ward_mod[2010].path_silhouette.path_silhouette.mean().round(4) == -0.0801 + assert ward_mod[2010].silhouette_scores.silhouette_score.mean().round(4) ==0.063 + assert ward_mod[2010].nearest_label.nearest_label.head().tolist() == [1, 2, 0, 2, 4] + diff --git a/geosnap/tests/test_isochrones.py b/geosnap/tests/test_isochrones.py new file mode 100644 index 00000000..8900cff9 --- /dev/null +++ b/geosnap/tests/test_isochrones.py @@ -0,0 +1,42 @@ +from geosnap.analyze import ( + isochrones_from_id, + isochrones_from_gdf, +) +from geosnap.io import get_acs +from geosnap import DataStore + +import pandana as pdna +import geopandas as gpd +import os + +if not os.path.exists("41740.h5"): + import quilt3 as q3 + + b = q3.Bucket("s3://spatial-ucr") + b.fetch("osm/metro_networks_8k/41740.h5", "./41740.h5") + +datasets = DataStore() +sd_tracts = get_acs(datasets, county_fips="06073", years=[2018]) +sd_network = pdna.Network.from_hdf5("41740.h5") +example_origin = 1985327805 + + +def test_isos_from_ids(): + iso = isochrones_from_id(example_origin, sd_network, threshold=1600) + assert iso.area.round(6).tolist()[0] == 0.000128 + + +def test_isos_from_gdf(): + sd_network.nodes_df["geometry"] = gpd.points_from_xy( + sd_network.nodes_df.x, sd_network.nodes_df.y + ) + example_point = gpd.GeoDataFrame( + sd_network.nodes_df.loc[example_origin] + ).T.set_geometry("geometry") + example_point = example_point.set_crs(4326) + t = isochrones_from_gdf( + origins=example_point, + network=sd_network, + threshold=1600, + ) + assert t.area.round(8).tolist()[0] == 0.00012821 From 1b684e309364afcfa6ced9dd75137edbff388129 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 19:01:08 -0800 Subject: [PATCH 2/3] update net path --- geosnap/tests/test_isochrones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geosnap/tests/test_isochrones.py b/geosnap/tests/test_isochrones.py index 8900cff9..15833d09 100644 --- a/geosnap/tests/test_isochrones.py +++ b/geosnap/tests/test_isochrones.py @@ -17,7 +17,7 @@ datasets = DataStore() sd_tracts = get_acs(datasets, county_fips="06073", years=[2018]) -sd_network = pdna.Network.from_hdf5("41740.h5") +sd_network = pdna.Network.from_hdf5("./41740.h5") example_origin = 1985327805 From 5e8d8fe5ef0d61daa976fad959635271f2afbcc2 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 19:07:20 -0800 Subject: [PATCH 3/3] update net path --- geosnap/tests/test_isochrones.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geosnap/tests/test_isochrones.py b/geosnap/tests/test_isochrones.py index 15833d09..a05d91a5 100644 --- a/geosnap/tests/test_isochrones.py +++ b/geosnap/tests/test_isochrones.py @@ -9,7 +9,7 @@ import geopandas as gpd import os -if not os.path.exists("41740.h5"): +if not os.path.exists("./41740.h5"): import quilt3 as q3 b = q3.Bucket("s3://spatial-ucr") @@ -17,7 +17,7 @@ datasets = DataStore() sd_tracts = get_acs(datasets, county_fips="06073", years=[2018]) -sd_network = pdna.Network.from_hdf5("./41740.h5") +sd_network = pdna.Network.from_hdf5("41740.h5") example_origin = 1985327805