Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datasets #205

Merged
merged 25 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d6d4ed6
fix pop variable for acs
knaaptime Feb 18, 2020
ab3d741
cache intermediate files from cenpy
knaaptime Feb 18, 2020
97c40e6
add skip argument
knaaptime Feb 18, 2020
1a9afd3
state typo
knaaptime Feb 18, 2020
9605645
cache dir path
knaaptime Feb 18, 2020
7ce407d
use pathlib and forego geoms
knaaptime Feb 18, 2020
27b436c
different pop variable
knaaptime Feb 18, 2020
4b60124
import datasets inside the fetch_data function
knaaptime Feb 21, 2020
978ab1f
make cenpy_fetch utils private
knaaptime Feb 21, 2020
0417dfa
missing underscore
knaaptime Feb 21, 2020
2a80fab
fix unemployed formula
knaaptime Feb 22, 2020
3a289eb
allow specifying storage path and use local raster for harmonize test
knaaptime Feb 23, 2020
8ac525b
missing imports in harmonize test
knaaptime Feb 23, 2020
592fbc4
try passing on cenpy download differently
knaaptime Feb 23, 2020
6d037b4
try except downloades differently
knaaptime Feb 23, 2020
41f37ef
wrong indentation in acs fetcher
knaaptime Feb 23, 2020
0dec6a6
acs fetcher
knaaptime Feb 23, 2020
d7b366d
break variable calculation into separate func
knaaptime Feb 24, 2020
d546415
missing return
knaaptime Feb 24, 2020
eeac91e
missing variable in process_acs
knaaptime Feb 24, 2020
45104e8
missing _ in process_acs
knaaptime Feb 24, 2020
fdecd79
use new quilt bucket
knaaptime Mar 4, 2020
4debf13
Merge branch 'master' into datasets
knaaptime Mar 6, 2020
be30efc
pin to quilt 3.1.11
knaaptime Mar 11, 2020
0e3481b
Merge branch 'datasets' of github.com:knaaptime/geosnap into datasets
knaaptime Mar 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ install:

script:
- if [[ $TRAVIS_JOB_NAME == python-* ]]; then
python -c "from tobler.data import store_rasters; store_rasters()";
python -c "import os;from geosnap import io;path = os.environ['DLPATH'];io.store_ltdb(sample=path + '/ltdb_sample.zip', fullcount=path + '/ltdb_full.zip')";
travis_wait 45 pytest --ignore=examples/ --ignore=geosnap/util/ --ignore=geosnap/visualize/ --cov geosnap;
jupyter nbconvert --to notebook --execute --inplace --ExecutePreprocessor.timeout=-1 --ExecutePreprocessor.kernel_name=python3 examples/*.ipynb;
Expand Down
8 changes: 4 additions & 4 deletions docs/_modules/geosnap/_data.html
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">tracts_cartographic</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>
<span class="n">administrative</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down Expand Up @@ -304,7 +304,7 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">blocks_2000</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down Expand Up @@ -359,7 +359,7 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">blocks_2010</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down
8 changes: 4 additions & 4 deletions docs/_modules/geosnap/io/storage.html
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> is 3.05 GB.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_blocks_2000"><a class="viewcode-back" href="../../../generated/geosnap.io.store_blocks_2000.html#geosnap.io.store_blocks_2000">[docs]</a><span class="k">def</span> <span class="nf">store_blocks_2000</span><span class="p">():</span>
Expand All @@ -173,7 +173,7 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> in place of streaming data for all census queries.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_blocks_2010"><a class="viewcode-back" href="../../../generated/geosnap.io.store_blocks_2010.html#geosnap.io.store_blocks_2010">[docs]</a><span class="k">def</span> <span class="nf">store_blocks_2010</span><span class="p">():</span>
Expand All @@ -186,7 +186,7 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> in place of streaming data for all census queries.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_ltdb"><a class="viewcode-back" href="../../../generated/geosnap.io.store_ltdb.html#geosnap.io.store_ltdb">[docs]</a><span class="k">def</span> <span class="nf">store_ltdb</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">fullcount</span><span class="p">):</span>
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- palettable
- pip
- tqdm
- quilt3 ==3.1.8
- quilt3 >=3.1.11
- xlrd
- region >=0.2.0
- tobler
2 changes: 1 addition & 1 deletion examples/01_getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"\n",
"geosnap works with data from anywhere in the world, but comes batteries-included with three decades of national US Census data, including boundaries for metropolitan statistical areas, states, counties, and tracts, and over 100 commonly used demographic and socioeconomic variables at the census-tract level. All of these data are stored as geopandas geodataframes in efficient [apache parquet](https://parquet.apache.org/) files and distributed through [quilt](https://quiltdata.com/). \n",
"\n",
"These data are available when you first import geosnap by streaming from our [quilt bucket](https://spatialucr.quiltdata.com/b/quilt-cgs) into memory. That can be useful if you dont need US data or if you just want to kick the tires, but it also means you need an internet connection to work with census data, and things may slow down depending on your network performance. For that reason, you can also use the `store_census` function to cache the data on your local machine for faster querying. This will only take around 400mb of disk space, speed up data operations, and remove the need for an internet connection."
"These data are available when you first import geosnap by streaming from our [quilt bucket](https://spatialucr.quiltdata.com/b/spatial-ucr) into memory. That can be useful if you dont need US data or if you just want to kick the tires, but it also means you need an internet connection to work with census data, and things may slow down depending on your network performance. For that reason, you can also use the `store_census` function to cache the data on your local machine for faster querying. This will only take around 400mb of disk space, speed up data operations, and remove the need for an internet connection."
]
},
{
Expand Down
8 changes: 4 additions & 4 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def __init__(self):
)
try:
tracts_cartographic = quilt3.Package.browse(
"census/tracts_cartographic", "s3://quilt-cgs"
"census/tracts_cartographic", "s3://spatial-ucr"
)
administrative = quilt3.Package.browse(
"census/administrative", "s3://quilt-cgs"
"census/administrative", "s3://spatial-ucr"
)

except Timeout:
Expand Down Expand Up @@ -188,7 +188,7 @@ def blocks_2000(self, states=None, convert=True, fips=None):
)
try:
blocks_2000 = quilt3.Package.browse(
"census/blocks_2000", "s3://quilt-cgs"
"census/blocks_2000", "s3://spatial-ucr"
)

except Timeout:
Expand Down Expand Up @@ -243,7 +243,7 @@ def blocks_2010(self, states=None, convert=True, fips=None):
)
try:
blocks_2010 = quilt3.Package.browse(
"census/blocks_2010", "s3://quilt-cgs"
"census/blocks_2010", "s3://spatial-ucr"
)

except Timeout:
Expand Down
14 changes: 7 additions & 7 deletions geosnap/io/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
storage = quilt3.Package()


def store_census():
def store_census(dest=None):
"""Save census data to the local quilt package storage.

Returns
Expand All @@ -43,11 +43,11 @@ def store_census():
is 3.05 GB.

"""
quilt3.Package.install("census/tracts_cartographic", "s3://quilt-cgs")
quilt3.Package.install("census/administrative", "s3://quilt-cgs")
quilt3.Package.install("census/tracts_cartographic", "s3://spatial-ucr", dest=dest)
quilt3.Package.install("census/administrative", "s3://spatial-ucr", dest=dest)


def store_blocks_2000():
def store_blocks_2000(dest=None):
"""Save census 2000 census block data to the local quilt package storage.

Returns
Expand All @@ -57,10 +57,10 @@ def store_blocks_2000():
in place of streaming data for all census queries.

"""
quilt3.Package.install("census/blocks_2000", "s3://quilt-cgs")
quilt3.Package.install("census/blocks_2000", "s3://spatial-ucr", dest=dest)


def store_blocks_2010():
def store_blocks_2010(dest=None):
"""Save census 2010 census block data to the local quilt package storage.

Returns
Expand All @@ -70,7 +70,7 @@ def store_blocks_2010():
in place of streaming data for all census queries.

"""
quilt3.Package.install("census/blocks_2010", "s3://quilt-cgs")
quilt3.Package.install("census/blocks_2010", "s3://spatial-ucr", dest=dest)


def store_ltdb(sample, fullcount):
Expand Down
4 changes: 2 additions & 2 deletions geosnap/io/variables.csv
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ p_asian_over_65,percentage of 65 years and older of Asians and Pacific Islanders
n_female_over_16,"females 16 years and over, except in armed forces",,dflabf,DCFEPR,SF3,P0700006+P0700007+P0700008,SF3,P043012,,Socioeconomic Status,
n_female_labor_force,females in labor force,,flabf,FEPR,SF3,P0700006+P0700007,SF3,P043010,,Socioeconomic Status,
n_labor_force,civilian labor force,,clf,,SF3,P0700002+P0700003+P0700006+P0700007,SF3,P043005+P043012,B27011_002E,Socioeconomic Status,
n_unemployed_persons,unemployed persons,,unemp,,SF3,P0700003+P0700007,SF3,P043007+P043014,C24010_001E-(B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E),Socioeconomic Status,
n_unemployed_persons,unemployed persons,,unemp,,SF3,P0700003+P0700007,SF3,P043007+P043014,B23001_008E+B23001_015E+B23001_022E+B23001_029E+B23001_036E+B23001_044E+B23001_050E+B23001_057E+B23001_064E+B23001_071E+B23001_094E+B23001_101E+B23001_108E+B23001_115E+B23001_122E+B23001_129E+B23001_136E+B23001_143E+B23001_150E+B23001_157E,Socioeconomic Status,
n_employed_over_16,employed persons 16 years and over,,empclf,EMPMT,SF3,P0700002+P0700006,SF3,P049001,B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E,Socioeconomic Status,
n_employed_professional,professional employees (by occupations),,prof,DLFRAT,SF3,P0780001+P0780002,SF3,P049017+P049044,,Socioeconomic Status,
n_employed_manufacturing,manufacturing employees (by industries),,manuf,PRFEMP,SF3,P0770004+P0770005,SF3,P049007+P049034,,Socioeconomic Status,
Expand Down Expand Up @@ -192,4 +192,4 @@ p_poverty_rate_black,percentage of blacks in poverty,p_poverty_rate_black=n_pove
p_poverty_rate_hispanic,percentage of Hispanics in poverty,p_poverty_rate_hispanic=n_poverty_hispanic / n_poverty_determined_persons*100,phpov,,,,,,,Socioeconomic Status,
p_poverty_rate_native,percentage of Native Americans in poverty,p_poverty_rate_native=n_poverty_native / n_poverty_determined_persons*100,pnapov,,,,,,,Socioeconomic Status,
p_poverty_rate_asian,percentage of Asian and Pacific Islanders in poverty,p_poverty_rate_asian=n_poverty_asian / n_poverty_determined_persons*100,papov,RASPR,,,,,,Socioeconomic Status,
n_total_pop,total population,,pop,TRCTPOP,SF1,P0010001,SF1,P001001,B01001_001E,total population,
n_total_pop,total population,,pop,TRCTPOP,SF1,P0010001,SF1,P001001,B01003_001E,total population,
10 changes: 9 additions & 1 deletion geosnap/tests/test_harmonize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from numpy.testing import assert_allclose

import os
import quilt3
from geosnap import Community

local_raster = os.path.join(os.getcwd(), "nlcd_2011.tif") # portability
if not os.path.exists(local_raster):
p = quilt3.Package.browse("rasters/nlcd", "s3://spatial-ucr")
p["nlcd_2011.tif"].fetch()


def test_harmonize_area():
la = Community.from_census(county_fips="06037")
Expand All @@ -10,6 +16,7 @@ def test_harmonize_area():
2000,
extensive_variables=["n_total_housing_units"],
intensive_variables=["p_vacant_housing_units"],
raster=local_raster
)

assert_allclose(
Expand Down Expand Up @@ -38,6 +45,7 @@ def test_harmonize_area_weighted():
extensive_variables=["n_total_housing_units"],
intensive_variables=["p_vacant_housing_units"],
weights_method="land_type_area",
raster=local_raster
)
assert harmonized_nlcd_weighted.gdf.n_total_housing_units.sum() == 900620.0
assert_allclose(
Expand Down
Loading