Skip to content

Commit

Permalink
Merge pull request #390 from oturns/sedafix
Browse files Browse the repository at this point in the history
add tests for nces and blocks, docstring for bea
  • Loading branch information
knaaptime authored Jan 18, 2024
2 parents 4c8f016 + a376728 commit e79bb6b
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 149 deletions.
2 changes: 1 addition & 1 deletion .ci/39.yml → .ci/312.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: test
channels:
- conda-forge
dependencies:
- python =3.9
- python =3.12
- pandas
- giddy >=2.2.1
- libpysal
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ jobs:
matrix:
os: [ubuntu-latest]
environment-file:
- .ci/39.yml
- .ci/310.yml
- .ci/311.yml
- .ci/312.yml
include:
- environment-file: .ci/310.yml
- environment-file: .ci/311.yml
os: macos-latest
- environment-file: .ci/310.yml
- environment-file: .ci/311.yml
os: windows-latest
defaults:
run:
Expand Down
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ available quickly with no configuration by accessing methods on the class.

DataStore
DataStore.acs
DataStore.bea_regions
DataStore.blocks_2000
DataStore.blocks_2010
DataStore.blocks_2020
Expand Down
52 changes: 28 additions & 24 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def __init__(self, data_dir="auto", disclaimer=False):
)

def __dir__(self):

atts = [
"acs",
"bea_regions",
"blocks_2000",
"blocks_2010",
"blocks_2020",
Expand All @@ -90,7 +90,7 @@ def __dir__(self):
"tracts_1990",
"tracts_2000",
"tracts_2010",
"tracts_2020"
"tracts_2020",
]

return atts
Expand All @@ -107,6 +107,21 @@ def show_data_dir(self, verbose=True):
print(self.data_dir)
return self.data_dir

def bea_regions(self):
"""Table that maps states to their respective BEA regions
Returns
-------
pandas.DataFrame
BEA region table
"""
return pd.read_csv(
os.path.join(
os.path.dirname(os.path.abspath(__file__)), "io/bea_regions.csv"
),
converters={'stfips':str}
)

def acs(self, year=2018, level="tract", states=None):
"""American Community Survey Data (5-year estimates).
Expand Down Expand Up @@ -187,7 +202,7 @@ def seda(
assert pooling in [
"pool",
"long",
"poolsub"
"poolsub",
], "`pool` argument must be either 'pool', 'long', or 'poolsub'"
assert standardize in [
"gcs",
Expand All @@ -197,7 +212,6 @@ def seda(
fn = f"seda_{level}_{pooling}_{standardize}_4.1_corrected"
else:
fn = f"seda_{level}_{pooling}_{standardize}_4.1"

local_path = pathlib.Path(self.data_dir, "seda", f"{fn}.parquet")
remote_path = f"https://stacks.stanford.edu/file/druid:xv742vh9296/{fn}.csv"
msg = (
Expand All @@ -216,8 +230,8 @@ def seda(
remote_path, converters={"sedasch": str, "fips": str}
)
t.sedasch = t.sedasch.str.rjust(12, "0")
except FileNotFoundError:
raise FileNotFoundError(
except FileNotFoundError as e:
raise FileNotFoundError from e(
"Unable to access local or remote SEDA data"
)
elif level == "geodist":
Expand All @@ -226,8 +240,8 @@ def seda(
remote_path, converters={"sedalea": str, "fips": str}
)
t.sedalea = t.sedalea.str.rjust(7, "0")
except FileNotFoundError:
raise FileNotFoundError(
except FileNotFoundError as e:
raise FileNotFoundError from e(
"Unable to access local or remote SEDA data"
)
t.fips = t.fips.str.rjust(2, "0")
Expand All @@ -250,10 +264,7 @@ def nces(self, year=1516, dataset="sabs"):
geopandas.GeoDataFrame
geodataframe of NCES data
"""
if dataset == "school_districts":
selector = "districts"
else:
selector = dataset
selector = "districts" if dataset == "school_districts" else dataset
local_path = pathlib.Path(self.data_dir, "nces", f"{dataset}_{year}.parquet")
remote_path = f"s3://spatial-ucr/nces/{selector}/{dataset}_{year}.parquet"
msg = "Streaming data from S3. Use `geosnap.io.store_nces()` to store the data locally for better performance"
Expand Down Expand Up @@ -569,16 +580,11 @@ def msa_definitions(self):
dataframe that stores state/county --> MSA crosswalk definitions.
"""
local = pathlib.Path(self.data_dir, "msa_definitions.parquet")
remote = "s3://spatial-ucr/census/administrative/msa_definitions.parquet"
msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance"
try:
t = pd.read_parquet(local)
except FileNotFoundError:
warn(msg)
t = pd.read_parquet(remote, storage_options={"anon": True})

return t
return pd.read_csv(
os.path.join(
os.path.dirname(os.path.abspath(__file__)), "io/msa_definitions.csv"
)
)

def ltdb(self):
"""Longitudinal Tract Database (LTDB).
Expand Down Expand Up @@ -626,5 +632,3 @@ def codebook(self):
return pd.read_csv(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "io/variables.csv")
)


163 changes: 52 additions & 111 deletions geosnap/io/bea_regions.csv
Original file line number Diff line number Diff line change
@@ -1,111 +1,52 @@
,geoid,metro,state,bea_region
0,10420,Akron,OH,Great Lakes
1,10580,Albany-Schenectady-Troy,NY,Mideast
2,10740,Albuquerque,NM,Southwest
3,10900,Allentown-Bethlehem-Easton,PA-NJ,Mideast
4,12060,Atlanta-Sandy Springs-Alpharetta,GA,Southeast
5,12260,Augusta-Richmond County,GA-SC,Southeast
6,12420,Austin-Round Rock-Georgetown,TX,Southwest
7,12540,Bakersfield,CA,Far West
8,12580,Baltimore-Columbia-Towson,MD,Mideast
9,12940,Baton Rouge,LA,Southeast
10,13820,Birmingham-Hoover,AL,Southeast
11,14260,Boise City,ID,Rocky Mountain
12,14460,Boston-Cambridge-Newton,MA-NH,New England
13,14860,Bridgeport-Stamford-Norwalk,CT,New England
14,15380,Buffalo-Cheektowaga,NY,Mideast
15,15980,Cape Coral-Fort Myers,FL,Southeast
16,16700,Charleston-North Charleston,SC,Southeast
17,16740,Charlotte-Concord-Gastonia,NC-SC,Southeast
18,16860,Chattanooga,TN-GA,Southeast
19,16980,Chicago-Naperville-Elgin,IL-IN-WI,Great Lakes
20,17140,Cincinnati,OH-KY-IN,Great Lakes
21,17460,Cleveland-Elyria,OH,Great Lakes
22,17820,Colorado Springs,CO,Rocky Mountain
23,17900,Columbia,SC,Southeast
24,18140,Columbus,OH,Great Lakes
25,19100,Dallas-Fort Worth-Arlington,TX,Southwest
26,19430,Dayton-Kettering,OH,Great Lakes
27,19660,Deltona-Daytona Beach-Ormond Beach,FL,Southeast
28,19740,Denver-Aurora-Lakewood,CO,Rocky Mountain
29,19780,Des Moines-West Des Moines,IA,Plains
30,19820,Detroit-Warren-Dearborn,MI,Great Lakes
31,20500,Durham-Chapel Hill,NC,Southeast
32,21340,El Paso,TX,Southwest
33,22180,Fayetteville,NC,Southeast
34,22220,Fayetteville-Springdale-Rogers,AR,Southeast
35,23420,Fresno,CA,Far West
36,24340,Grand Rapids-Kentwood,MI,Great Lakes
37,24660,Greensboro-High Point,NC,Southeast
38,24860,Greenville-Anderson,SC,Southeast
39,25420,Harrisburg-Carlisle,PA,Mideast
40,25540,Hartford-East Hartford-Middletown,CT,New England
41,26420,Houston-The Woodlands-Sugar Land,TX,Southwest
42,26900,Indianapolis-Carmel-Anderson,IN,Great Lakes
43,27140,Jackson,MS,Southeast
44,27260,Jacksonville,FL,Southeast
45,28140,Kansas City,MO-KS,Plains
46,28940,Knoxville,TN,Southeast
47,29460,Lakeland-Winter Haven,FL,Southeast
48,29540,Lancaster,PA,Mideast
49,29620,Lansing-East Lansing,MI,Great Lakes
50,29820,Las Vegas-Henderson-Paradise,NV,Far West
51,30460,Lexington-Fayette,KY,Southeast
52,30780,Little Rock-North Little Rock-Conway,AR,Southeast
53,31080,Los Angeles-Long Beach-Anaheim,CA,Far West
54,31140,Louisville/Jefferson County,KY-IN,Southeast
55,31540,Madison,WI,Great Lakes
56,32580,McAllen-Edinburg-Mission,TX,Southwest
57,32820,Memphis,TN-MS-AR,Southeast
58,33100,Miami-Fort Lauderdale-Pompano Beach,FL,Southeast
59,33340,Milwaukee-Waukesha,WI,Great Lakes
60,33460,Minneapolis-St. Paul-Bloomington,MN-WI,Plains
61,33700,Modesto,CA,Far West
62,34980,Nashville-Davidson--Murfreesboro--Franklin,TN,Southeast
63,35300,New Haven-Milford,CT,New England
64,35380,New Orleans-Metairie,LA,Southeast
65,35620,New York-Newark-Jersey City,NY-NJ-PA,Mideast
66,35840,North Port-Sarasota-Bradenton,FL,Southeast
67,36260,Ogden-Clearfield,UT,Rocky Mountain
68,36420,Oklahoma City,OK,Southwest
69,36540,Omaha-Council Bluffs,NE-IA,Plains
70,36740,Orlando-Kissimmee-Sanford,FL,Southeast
71,37100,Oxnard-Thousand Oaks-Ventura,CA,Far West
72,37340,Palm Bay-Melbourne-Titusville,FL,Southeast
73,37980,Philadelphia-Camden-Wilmington,PA-NJ-DE-MD,Mideast
74,38060,Phoenix-Mesa-Chandler,AZ,Southwest
75,38300,Pittsburgh,PA,Mideast
76,38860,Portland-South Portland,ME,New England
77,38900,Portland-Vancouver-Hillsboro,OR-WA,Far West
78,39100,Poughkeepsie-Newburgh-Middletown,NY,Mideast
79,39300,Providence-Warwick,RI-MA,New England
80,39340,Provo-Orem,UT,Rocky Mountain
81,39580,Raleigh-Cary,NC,Southeast
82,40060,Richmond,VA,Southeast
83,40140,Riverside-San Bernardino-Ontario,CA,Far West
84,40380,Rochester,NY,Mideast
85,40900,Sacramento-Roseville-Folsom,CA,Far West
86,41620,Salt Lake City,UT,Rocky Mountain
87,41700,San Antonio-New Braunfels,TX,Southwest
88,41740,San Diego-Chula Vista-Carlsbad,CA,Far West
89,41860,San Francisco-Oakland-Berkeley,CA,Far West
90,41940,San Jose-Sunnyvale-Santa Clara,CA,Far West
91,42220,Santa Rosa-Petaluma,CA,Far West
92,42540,Scranton--Wilkes-Barre,PA,Mideast
93,42660,Seattle-Tacoma-Bellevue,WA,Far West
94,44060,Spokane-Spokane Valley,WA,Far West
95,44140,Springfield,MA,New England
96,41180,St. Louis,MO-IL,Plains
97,44700,Stockton,CA,Far West
98,45060,Syracuse,NY,Mideast
99,45300,Tampa-St. Petersburg-Clearwater,FL,Southeast
100,45780,Toledo,OH,Great Lakes
101,46060,Tucson,AZ,Southwest
102,46140,Tulsa,OK,Southwest
103,46520,Urban Honolulu,HI,Far West
104,47260,Virginia Beach-Norfolk-Newport News,VA-NC,Southeast
105,47900,Washington-Arlington-Alexandria,DC-VA-MD-WV,Mideast
106,48620,Wichita,KS,Plains
107,49180,Winston-Salem,NC,Southeast
108,49340,Worcester,MA-CT,New England
109,49660,Youngstown-Warren-Boardman,OH-PA,Great Lakes
geoid,name,stfips,state
91000,"New England",09000,"Connecticut"
91000,"New England",23000,"Maine"
91000,"New England",25000,"Massachusetts"
91000,"New England",33000,"New Hampshire"
91000,"New England",44000,"Rhode Island"
91000,"New England",50000,"Vermont"
92000,"Mideast",10000,"Delaware"
92000,"Mideast",11000,"District of Columbia"
92000,"Mideast",24000,"Maryland"
92000,"Mideast",34000,"New Jersey"
92000,"Mideast",36000,"New York"
92000,"Mideast",42000,"Pennsylvania"
93000,"Great Lakes",17000,"Illinois"
93000,"Great Lakes",18000,"Indiana"
93000,"Great Lakes",26000,"Michigan"
93000,"Great Lakes",39000,"Ohio"
93000,"Great Lakes",55000,"Wisconsin"
94000,"Plains",19000,"Iowa"
94000,"Plains",20000,"Kansas"
94000,"Plains",27000,"Minnesota"
94000,"Plains",29000,"Missouri"
94000,"Plains",31000,"Nebraska"
94000,"Plains",38000,"North Dakota"
94000,"Plains",46000,"South Dakota"
95000,"Southeast",01000,"Alabama"
95000,"Southeast",05000,"Arkansas"
95000,"Southeast",12000,"Florida"
95000,"Southeast",13000,"Georgia"
95000,"Southeast",21000,"Kentucky"
95000,"Southeast",22000,"Louisiana"
95000,"Southeast",28000,"Mississippi"
95000,"Southeast",37000,"North Carolina"
95000,"Southeast",45000,"South Carolina"
95000,"Southeast",47000,"Tennessee"
95000,"Southeast",51000,"Virginia"
95000,"Southeast",54000,"West Virginia"
96000,"Southwest",04000,"Arizona"
96000,"Southwest",35000,"New Mexico"
96000,"Southwest",40000,"Oklahoma"
96000,"Southwest",48000,"Texas"
97000,"Rocky Mountain",08000,"Colorado"
97000,"Rocky Mountain",16000,"Idaho"
97000,"Rocky Mountain",30000,"Montana"
97000,"Rocky Mountain",49000,"Utah"
97000,"Rocky Mountain",56000,"Wyoming"
98000,"Far West",02000,"Alaska"
98000,"Far West",06000,"California"
98000,"Far West",15000,"Hawaii"
98000,"Far West",32000,"Nevada"
98000,"Far West",41000,"Oregon"
98000,"Far West",53000,"Washington"
5 changes: 4 additions & 1 deletion geosnap/tests/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@ def test_nces_sabs():
assert sabs.shape == (75128, 15)


def test_acs():
def test_acs_tract():
acs = io.get_acs(store, fips="11", years=[2018], level="tract")
assert acs.shape == (179, 157)

def test_acs_blockgroup():
acs = io.get_acs(store, fips="11", years=[2018], level="bg")
assert acs.shape == (450, 38)

@pytest.mark.skipif(not LTDB, reason="unable to locate LTDB data")
def test_ltdb_from_boundary():
Expand Down
25 changes: 16 additions & 9 deletions geosnap/tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,18 @@ def test_data_dir():
loc = datasets.show_data_dir()
assert len(loc) > 5


def test_acs():
df = datasets.acs(year=2012, states=["11"])
assert df.shape == (179, 104)


def test_tracts90():
df = datasets.tracts_1990(states=["11"])
assert df.shape == (192, 164)


def test_tracts00():
df = datasets.tracts_2000(states=["11"])
assert df.shape == (188, 192)


def test_tracts10():
df = datasets.tracts_2010(states=["11"])
assert df.shape == (179, 194)
Expand All @@ -30,25 +26,36 @@ def test_tracts20():
df = datasets.tracts_2020(states=["11"])
assert df.shape == (206, 15)


def test_counties():
assert datasets.counties().shape == (3233, 2)


def test_states():
assert datasets.states().shape == (51, 3)


def test_msas():
df = datasets.msas()
assert df.shape == (939, 4)


def test_msa_defs():
df = datasets.msa_definitions()
assert df.shape == (1916, 13)


def test_codebook():
df = datasets.codebook()
assert df.shape == (194, 12)

def test_bea():
df = datasets.bea_regions()
assert df.shape == (51, 4)

def test_blocks_2000():
df = datasets.blocks_2000(states=['11'])
assert df.shape == (5674, 3)

def test_blocks_2010():
df = datasets.blocks_2010(states=['11'])
assert df.shape == (6507, 5)

def test_blocks_2020():
df = datasets.blocks_2020(states=['11'])
assert df.shape == (6012, 7)

0 comments on commit e79bb6b

Please sign in to comment.