Skip to content

Commit

Permalink
Merge pull request #205 from knaaptime/datasets
Browse files Browse the repository at this point in the history
Datasets
  • Loading branch information
knaaptime authored Mar 11, 2020
2 parents 5e22219 + 0e3481b commit 608c9ad
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 139 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ install:

script:
- if [[ $TRAVIS_JOB_NAME == python-* ]]; then
python -c "from tobler.data import store_rasters; store_rasters()";
python -c "import os;from geosnap import io;path = os.environ['DLPATH'];io.store_ltdb(sample=path + '/ltdb_sample.zip', fullcount=path + '/ltdb_full.zip')";
travis_wait 45 pytest --ignore=examples/ --ignore=geosnap/util/ --ignore=geosnap/visualize/ --cov geosnap;
jupyter nbconvert --to notebook --execute --inplace --ExecutePreprocessor.timeout=-1 --ExecutePreprocessor.kernel_name=python3 examples/*.ipynb;
Expand Down
8 changes: 4 additions & 4 deletions docs/_modules/geosnap/_data.html
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">tracts_cartographic</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>
<span class="n">administrative</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down Expand Up @@ -304,7 +304,7 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">blocks_2000</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down Expand Up @@ -359,7 +359,7 @@ <h1>Source code for geosnap._data</h1><div class="highlight"><pre>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">blocks_2010</span> <span class="o">=</span> <span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">browse</span><span class="p">(</span>
<span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span>
<span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span>
<span class="p">)</span>

<span class="k">except</span> <span class="n">Timeout</span><span class="p">:</span>
Expand Down
8 changes: 4 additions & 4 deletions docs/_modules/geosnap/io/storage.html
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> is 3.05 GB.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/tracts_cartographic&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/administrative&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_blocks_2000"><a class="viewcode-back" href="../../../generated/geosnap.io.store_blocks_2000.html#geosnap.io.store_blocks_2000">[docs]</a><span class="k">def</span> <span class="nf">store_blocks_2000</span><span class="p">():</span>
Expand All @@ -173,7 +173,7 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> in place of streaming data for all census queries.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2000&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_blocks_2010"><a class="viewcode-back" href="../../../generated/geosnap.io.store_blocks_2010.html#geosnap.io.store_blocks_2010">[docs]</a><span class="k">def</span> <span class="nf">store_blocks_2010</span><span class="p">():</span>
Expand All @@ -186,7 +186,7 @@ <h1>Source code for geosnap.io.storage</h1><div class="highlight"><pre>
<span class="sd"> in place of streaming data for all census queries.</span>

<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://quilt-cgs&quot;</span><span class="p">)</span></div>
<span class="n">quilt3</span><span class="o">.</span><span class="n">Package</span><span class="o">.</span><span class="n">install</span><span class="p">(</span><span class="s2">&quot;census/blocks_2010&quot;</span><span class="p">,</span> <span class="s2">&quot;s3://spatial-ucr&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="store_ltdb"><a class="viewcode-back" href="../../../generated/geosnap.io.store_ltdb.html#geosnap.io.store_ltdb">[docs]</a><span class="k">def</span> <span class="nf">store_ltdb</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">fullcount</span><span class="p">):</span>
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- palettable
- pip
- tqdm
- quilt3 ==3.1.8
- quilt3 >=3.1.11
- xlrd
- region >=0.2.0
- tobler
2 changes: 1 addition & 1 deletion examples/01_getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"\n",
"geosnap works with data from anywhere in the world, but comes batteries-included with three decades of national US Census data, including boundaries for metropolitan statistical areas, states, counties, and tracts, and over 100 commonly used demographic and socioeconomic variables at the census-tract level. All of these data are stored as geopandas geodataframes in efficient [apache parquet](https://parquet.apache.org/) files and distributed through [quilt](https://quiltdata.com/). \n",
"\n",
"These data are available when you first import geosnap by streaming from our [quilt bucket](https://spatialucr.quiltdata.com/b/quilt-cgs) into memory. That can be useful if you dont need US data or if you just want to kick the tires, but it also means you need an internet connection to work with census data, and things may slow down depending on your network performance. For that reason, you can also use the `store_census` function to cache the data on your local machine for faster querying. This will only take around 400mb of disk space, speed up data operations, and remove the need for an internet connection."
"These data are available when you first import geosnap by streaming from our [quilt bucket](https://spatialucr.quiltdata.com/b/spatial-ucr) into memory. That can be useful if you dont need US data or if you just want to kick the tires, but it also means you need an internet connection to work with census data, and things may slow down depending on your network performance. For that reason, you can also use the `store_census` function to cache the data on your local machine for faster querying. This will only take around 400mb of disk space, speed up data operations, and remove the need for an internet connection."
]
},
{
Expand Down
8 changes: 4 additions & 4 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def __init__(self):
)
try:
tracts_cartographic = quilt3.Package.browse(
"census/tracts_cartographic", "s3://quilt-cgs"
"census/tracts_cartographic", "s3://spatial-ucr"
)
administrative = quilt3.Package.browse(
"census/administrative", "s3://quilt-cgs"
"census/administrative", "s3://spatial-ucr"
)

except Timeout:
Expand Down Expand Up @@ -188,7 +188,7 @@ def blocks_2000(self, states=None, convert=True, fips=None):
)
try:
blocks_2000 = quilt3.Package.browse(
"census/blocks_2000", "s3://quilt-cgs"
"census/blocks_2000", "s3://spatial-ucr"
)

except Timeout:
Expand Down Expand Up @@ -243,7 +243,7 @@ def blocks_2010(self, states=None, convert=True, fips=None):
)
try:
blocks_2010 = quilt3.Package.browse(
"census/blocks_2010", "s3://quilt-cgs"
"census/blocks_2010", "s3://spatial-ucr"
)

except Timeout:
Expand Down
14 changes: 7 additions & 7 deletions geosnap/io/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
storage = quilt3.Package()


def store_census():
def store_census(dest=None):
"""Save census data to the local quilt package storage.
Returns
Expand All @@ -43,11 +43,11 @@ def store_census():
is 3.05 GB.
"""
quilt3.Package.install("census/tracts_cartographic", "s3://quilt-cgs")
quilt3.Package.install("census/administrative", "s3://quilt-cgs")
quilt3.Package.install("census/tracts_cartographic", "s3://spatial-ucr", dest=dest)
quilt3.Package.install("census/administrative", "s3://spatial-ucr", dest=dest)


def store_blocks_2000():
def store_blocks_2000(dest=None):
"""Save census 2000 census block data to the local quilt package storage.
Returns
Expand All @@ -57,10 +57,10 @@ def store_blocks_2000():
in place of streaming data for all census queries.
"""
quilt3.Package.install("census/blocks_2000", "s3://quilt-cgs")
quilt3.Package.install("census/blocks_2000", "s3://spatial-ucr", dest=dest)


def store_blocks_2010():
def store_blocks_2010(dest=None):
"""Save census 2010 census block data to the local quilt package storage.
Returns
Expand All @@ -70,7 +70,7 @@ def store_blocks_2010():
in place of streaming data for all census queries.
"""
quilt3.Package.install("census/blocks_2010", "s3://quilt-cgs")
quilt3.Package.install("census/blocks_2010", "s3://spatial-ucr", dest=dest)


def store_ltdb(sample, fullcount):
Expand Down
4 changes: 2 additions & 2 deletions geosnap/io/variables.csv
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ p_asian_over_65,percentage of 65 years and older of Asians and Pacific Islanders
n_female_over_16,"females 16 years and over, except in armed forces",,dflabf,DCFEPR,SF3,P0700006+P0700007+P0700008,SF3,P043012,,Socioeconomic Status,
n_female_labor_force,females in labor force,,flabf,FEPR,SF3,P0700006+P0700007,SF3,P043010,,Socioeconomic Status,
n_labor_force,civilian labor force,,clf,,SF3,P0700002+P0700003+P0700006+P0700007,SF3,P043005+P043012,B27011_002E,Socioeconomic Status,
n_unemployed_persons,unemployed persons,,unemp,,SF3,P0700003+P0700007,SF3,P043007+P043014,C24010_001E-(B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E),Socioeconomic Status,
n_unemployed_persons,unemployed persons,,unemp,,SF3,P0700003+P0700007,SF3,P043007+P043014,B23001_008E+B23001_015E+B23001_022E+B23001_029E+B23001_036E+B23001_044E+B23001_050E+B23001_057E+B23001_064E+B23001_071E+B23001_094E+B23001_101E+B23001_108E+B23001_115E+B23001_122E+B23001_129E+B23001_136E+B23001_143E+B23001_150E+B23001_157E,Socioeconomic Status,
n_employed_over_16,employed persons 16 years and over,,empclf,EMPMT,SF3,P0700002+P0700006,SF3,P049001,B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E,Socioeconomic Status,
n_employed_professional,professional employees (by occupations),,prof,DLFRAT,SF3,P0780001+P0780002,SF3,P049017+P049044,,Socioeconomic Status,
n_employed_manufacturing,manufacturing employees (by industries),,manuf,PRFEMP,SF3,P0770004+P0770005,SF3,P049007+P049034,,Socioeconomic Status,
Expand Down Expand Up @@ -192,4 +192,4 @@ p_poverty_rate_black,percentage of blacks in poverty,p_poverty_rate_black=n_pove
p_poverty_rate_hispanic,percentage of Hispanics in poverty,p_poverty_rate_hispanic=n_poverty_hispanic / n_poverty_determined_persons*100,phpov,,,,,,,Socioeconomic Status,
p_poverty_rate_native,percentage of Native Americans in poverty,p_poverty_rate_native=n_poverty_native / n_poverty_determined_persons*100,pnapov,,,,,,,Socioeconomic Status,
p_poverty_rate_asian,percentage of Asian and Pacific Islanders in poverty,p_poverty_rate_asian=n_poverty_asian / n_poverty_determined_persons*100,papov,RASPR,,,,,,Socioeconomic Status,
n_total_pop,total population,,pop,TRCTPOP,SF1,P0010001,SF1,P001001,B01001_001E,total population,
n_total_pop,total population,,pop,TRCTPOP,SF1,P0010001,SF1,P001001,B01003_001E,total population,
10 changes: 9 additions & 1 deletion geosnap/tests/test_harmonize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from numpy.testing import assert_allclose

import os
import quilt3
from geosnap import Community

local_raster = os.path.join(os.getcwd(), "nlcd_2011.tif") # portability
if not os.path.exists(local_raster):
p = quilt3.Package.browse("rasters/nlcd", "s3://spatial-ucr")
p["nlcd_2011.tif"].fetch()


def test_harmonize_area():
la = Community.from_census(county_fips="06037")
Expand All @@ -10,6 +16,7 @@ def test_harmonize_area():
2000,
extensive_variables=["n_total_housing_units"],
intensive_variables=["p_vacant_housing_units"],
raster=local_raster
)

assert_allclose(
Expand Down Expand Up @@ -38,6 +45,7 @@ def test_harmonize_area_weighted():
extensive_variables=["n_total_housing_units"],
intensive_variables=["p_vacant_housing_units"],
weights_method="land_type_area",
raster=local_raster
)
assert harmonized_nlcd_weighted.gdf.n_total_housing_units.sum() == 900620.0
assert_allclose(
Expand Down
Loading

0 comments on commit 608c9ad

Please sign in to comment.