diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 521a1386..e3e24c2a 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -46,7 +46,7 @@ jobs: - name: Test geosnap run: | - pytest -v --color yes --cov geosnap --cov-append --cov-report term-missing --cov-report xml . + pytest -v --color yes --cov geosnap/tests --cov-append --cov-report term-missing --cov-report xml . - uses: codecov/codecov-action@v3 diff --git a/docs/api.rst b/docs/api.rst index 0c753c8a..62bbb8e3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -41,7 +41,7 @@ available quickly with no configuration by accessing methods on the class. DataStore.tracts_1990 DataStore.tracts_2000 DataStore.tracts_2010 - + DataStore.tracts_2020 Storing data ''''''''''''''' diff --git a/geosnap/_data.py b/geosnap/_data.py index 2f4863cd..f2d3c627 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -54,8 +54,7 @@ def __delitem__(self, key): class DataStore: """Storage for geosnap data. Currently supports data from several U.S. federal agencies and national research centers.""" - def __init__(self, data_dir="auto"): - self + def __init__(self, data_dir="auto", disclaimer=False): appname = "geosnap" appauthor = "geosnap" @@ -63,11 +62,12 @@ def __init__(self, data_dir="auto"): self.data_dir = user_data_dir(appname, appauthor) else: self.data_dir = data_dir - warn( - "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " - "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " - "The end-user is responsible for any and all analyses or applications created with the package." - ) + if disclaimer: + warn( + "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " + "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " + "The end-user is responsible for any and all analyses or applications created with the package." + ) def __dir__(self): @@ -75,6 +75,7 @@ def __dir__(self): "acs", "blocks_2000", "blocks_2010", + "blocks_2020", "codebook", "counties", "ejscreen", @@ -89,6 +90,7 @@ def __dir__(self): "tracts_1990", "tracts_2000", "tracts_2010", + "tracts_2020" ] return atts @@ -431,9 +433,8 @@ def tracts_2000(self, states=None): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2000 tracts as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2000 tracts as a geodataframe """ local = pathlib.Path(self.data_dir, "tracts_2000_500k.parquet") @@ -459,9 +460,8 @@ def tracts_2010( Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2010 tracts as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2010 tracts as a geodataframe """ msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" @@ -474,6 +474,33 @@ def tracts_2010( t["year"] = 2010 return t + def tracts_2020( + self, + states=None, + ): + """Nationwide Census Tracts as drawn in 2020 (cartographic 500k). + + Parameters + ---------- + states : list-like + list of state fips to subset the national dataframe + + Returns + ------- + geopandas.GeoDataFrame + 2020 tracts as a geodataframe + + """ + msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" + local = pathlib.Path(self.data_dir, "tracts_2020_500k.parquet") + remote = "s3://spatial-ucr/census/tracts_cartographic/tracts_2020_500k.parquet" + t = _fetcher(local, remote, msg) + + if states: + t = t[t.geoid.str[:2].isin(states)] + t["year"] = 2020 + return t + def msas(self): """Metropolitan Statistical Areas as drawn in 2020. @@ -483,9 +510,8 @@ def msas(self): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2010 MSAs as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2010 MSAs as a geodataframe """ local = pathlib.Path(self.data_dir, "msas.parquet") @@ -500,9 +526,8 @@ def states(self): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - US States as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + US States as a geodataframe """ local = pathlib.Path(self.data_dir, "states.parquet") @@ -515,16 +540,10 @@ def states(self): def counties(self): """Nationwide counties as drawn in 2010. - Parameters - ---------- - convert : bool - if True, return geodataframe, else return dataframe (the default is True). - Returns ------- geopandas.GeoDataFrame - 2010 counties as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + 2010 counties as a geodataframe. """ local = pathlib.Path(self.data_dir, "counties.parquet") diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index d9883f1d..eb0ebc35 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -26,6 +26,10 @@ def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) +def test_tracts20(): + df = datasets.tracts_2020(states=["11"]) + assert df.shape == (206, 15) + def test_counties(): assert datasets.counties().shape == (3233, 2)