From 1d0169e5a19503ada73c38193773f8f882a4eb5a Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 16 Jan 2024 14:12:00 -0800 Subject: [PATCH 1/2] seda url, animation basemap, and harmonize to gdf --- geosnap/_data.py | 6 +++++- geosnap/harmonize/harmonize.py | 11 ++++++----- geosnap/visualize/mapping.py | 3 +++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/geosnap/_data.py b/geosnap/_data.py index ac499a60..94b787b8 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -193,7 +193,11 @@ def seda( "gcs", "cs", ], "`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized" - fn = f"seda_{level}_{pooling}_{standardize}_4.1" + if pooling=='poolsub': + fn = f"seda_{level}_{pooling}_{standardize}_4.1_corrected" + else: + fn = f"seda_{level}_{pooling}_{standardize}_4.1" + local_path = pathlib.Path(self.data_dir, "seda", f"{fn}.parquet") remote_path = f"https://stacks.stanford.edu/file/druid:xv742vh9296/{fn}.csv" msg = ( diff --git a/geosnap/harmonize/harmonize.py b/geosnap/harmonize/harmonize.py index 09931ad6..17a2f55a 100644 --- a/geosnap/harmonize/harmonize.py +++ b/geosnap/harmonize/harmonize.py @@ -156,7 +156,7 @@ def harmonize( if intensive_variables is not None: for i in intensive_variables: allcols.append(i) - + with tqdm(total=len(times), desc=f"Converting {len(times)} time periods") as pbar: for i in times: pbar.set_description(f"Harmonizing {i}") @@ -172,10 +172,10 @@ def harmonize( allocate_total=allocate_total, ) else: - # if there are NaNs, tobler will raise lots of warnings, that it's filling - # with implicit 0s. Those warnings are superfluous most of the time + # if there are NaNs, tobler will raise lots of warnings, that it's filling + # with implicit 0s. Those warnings are superfluous most of the time with warnings.catch_warnings(): - warnings.simplefilter("ignore") + warnings.simplefilter("ignore") interpolation = area_interpolate( source_df, target_df.copy(), @@ -225,7 +225,8 @@ def harmonize( pbar.update(1) pbar.set_description("Complete") pbar.close() - interpolated_dfs.append(target_df[allcols].set_index(unit_index)) + if target_year is not None: + interpolated_dfs.append(target_df[allcols].set_index(unit_index)) harmonized_df = gpd.GeoDataFrame(pd.concat(interpolated_dfs), crs=crs) diff --git a/geosnap/visualize/mapping.py b/geosnap/visualize/mapping.py index 9c181d99..f9cc56cd 100644 --- a/geosnap/visualize/mapping.py +++ b/geosnap/visualize/mapping.py @@ -266,6 +266,9 @@ def plot_timeseries( axs.format(suptitle=column) else: axs.format(suptitle=title) + else: + if title: + plt.suptitle(title) if save_fig: f.savefig(save_fig, dpi=dpi, bbox_inches="tight") From f8057c741e5a88b14f187e9546093a978115a682 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Tue, 16 Jan 2024 14:40:17 -0800 Subject: [PATCH 2/2] add test for harmonize target_gdf --- geosnap/tests/test_harmonize.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/geosnap/tests/test_harmonize.py b/geosnap/tests/test_harmonize.py index cc8c89a7..237b8830 100644 --- a/geosnap/tests/test_harmonize.py +++ b/geosnap/tests/test_harmonize.py @@ -7,12 +7,6 @@ from geosnap.harmonize import harmonize from geosnap.io import get_census -local_raster = os.path.join(os.getcwd(), "nlcd_2011.tif") # portability -if not os.path.exists(local_raster): - p = quilt3.Package.browse("rasters/nlcd", "s3://spatial-ucr") - p["nlcd_2011.tif"].fetch() - - def test_harmonize_area(): la = get_census(county_fips="06037", datastore=DataStore()) @@ -49,7 +43,7 @@ def test_harmonize_area_weighted(): extensive_variables=["n_total_housing_units"], intensive_variables=["p_vacant_housing_units"], weights_method="dasymetric", - raster=local_raster, + raster='https://spatial-ucr.s3.amazonaws.com/nlcd/landcover/nlcd_landcover_2011.tif', ) assert harmonized_nlcd_weighted.n_total_housing_units.sum().round(0) == 900620.0 assert_allclose( @@ -57,3 +51,19 @@ def test_harmonize_area_weighted(): 8832.8796, rtol=1e-03, ) + +def test_harmonize_target_gdf(): + + balt = get_census(county_fips="24510", datastore=DataStore()) + tgt_gdf = balt[balt.year==2000][['geometry']] + gdf = harmonize(balt, + target_gdf=tgt_gdf, + extensive_variables=["n_total_housing_units"], + intensive_variables=["p_vacant_housing_units"], + ) + assert gdf.n_total_housing_units.sum().round(0) == 900620.0 + assert_allclose( + gdf.p_vacant_housing_units.sum(), + 8832.8796, + rtol=1e-03, + )