Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add 2020 cartographic tracts #373

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:

- name: Test geosnap
run: |
pytest -v --color yes --cov geosnap --cov-append --cov-report term-missing --cov-report xml .
pytest -v --color yes --cov geosnap/tests --cov-append --cov-report term-missing --cov-report xml .

- uses: codecov/codecov-action@v3

Expand Down
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ available quickly with no configuration by accessing methods on the class.
DataStore.tracts_1990
DataStore.tracts_2000
DataStore.tracts_2010

DataStore.tracts_2020

Storing data
'''''''''''''''
Expand Down
71 changes: 45 additions & 26 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,28 @@ def __delitem__(self, key):
class DataStore:
"""Storage for geosnap data. Currently supports data from several U.S. federal agencies and national research centers."""

def __init__(self, data_dir="auto"):
self
def __init__(self, data_dir="auto", disclaimer=False):
appname = "geosnap"
appauthor = "geosnap"

if data_dir == "auto":
self.data_dir = user_data_dir(appname, appauthor)
else:
self.data_dir = data_dir
warn(
"The geosnap data storage class is provided for convenience only. The geosnap developers make no promises "
"regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. "
"The end-user is responsible for any and all analyses or applications created with the package."
)
if disclaimer:
warn(
"The geosnap data storage class is provided for convenience only. The geosnap developers make no promises "
"regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. "
"The end-user is responsible for any and all analyses or applications created with the package."
)

def __dir__(self):

atts = [
"acs",
"blocks_2000",
"blocks_2010",
"blocks_2020",
"codebook",
"counties",
"ejscreen",
Expand All @@ -89,6 +90,7 @@ def __dir__(self):
"tracts_1990",
"tracts_2000",
"tracts_2010",
"tracts_2020"
]

return atts
Expand Down Expand Up @@ -431,9 +433,8 @@ def tracts_2000(self, states=None):

Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2000 tracts as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2000 tracts as a geodataframe

"""
local = pathlib.Path(self.data_dir, "tracts_2000_500k.parquet")
Expand All @@ -459,9 +460,8 @@ def tracts_2010(

Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2010 tracts as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2010 tracts as a geodataframe

"""
msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance"
Expand All @@ -474,6 +474,33 @@ def tracts_2010(
t["year"] = 2010
return t

def tracts_2020(
self,
states=None,
):
"""Nationwide Census Tracts as drawn in 2020 (cartographic 500k).

Parameters
----------
states : list-like
list of state fips to subset the national dataframe

Returns
-------
geopandas.GeoDataFrame
2020 tracts as a geodataframe

"""
msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance"
local = pathlib.Path(self.data_dir, "tracts_2020_500k.parquet")
remote = "s3://spatial-ucr/census/tracts_cartographic/tracts_2020_500k.parquet"
t = _fetcher(local, remote, msg)

if states:
t = t[t.geoid.str[:2].isin(states)]
t["year"] = 2020
return t

def msas(self):
"""Metropolitan Statistical Areas as drawn in 2020.

Expand All @@ -483,9 +510,8 @@ def msas(self):

Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2010 MSAs as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2010 MSAs as a geodataframe

"""
local = pathlib.Path(self.data_dir, "msas.parquet")
Expand All @@ -500,9 +526,8 @@ def states(self):

Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
US States as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
US States as a geodataframe

"""
local = pathlib.Path(self.data_dir, "states.parquet")
Expand All @@ -515,16 +540,10 @@ def states(self):
def counties(self):
"""Nationwide counties as drawn in 2010.

Parameters
----------
convert : bool
if True, return geodataframe, else return dataframe (the default is True).

Returns
-------
geopandas.GeoDataFrame
2010 counties as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
2010 counties as a geodataframe.

"""
local = pathlib.Path(self.data_dir, "counties.parquet")
Expand Down
4 changes: 4 additions & 0 deletions geosnap/tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def test_tracts10():
df = datasets.tracts_2010(states=["11"])
assert df.shape == (179, 194)

def test_tracts20():
df = datasets.tracts_2020(states=["11"])
assert df.shape == (206, 15)


def test_counties():
assert datasets.counties().shape == (3233, 2)
Expand Down
Loading