-
Notifications
You must be signed in to change notification settings - Fork 69
/
place_ua.py
82 lines (74 loc) · 2.42 KB
/
place_ua.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""Retrieve Census Place->Urbanized Area crosswalk"""
from collections import defaultdict
import unicodecsv as csv
import logging
import requests
import us
import re
from skills_utils.fs import cache_json
URL = 'http://www2.census.gov/geo/docs/maps-data/data/rel/ua_place_rel_10.txt'
ABBR_LOOKUP = us.states.mapping('fips', 'abbr')
SUFFIXES = [
'city',
'town',
'village',
'CDP',
'zona urbana',
'comunidad',
'borough',
'consolidated government',
'municipality',
'unified government',
'metro government',
'metropolitan government',
'urban county',
]
DELIMITERS = ['/', '-', ' City']
@cache_json('place_ua_lookup.json')
def place_ua(city_cleaner):
"""
Construct a Place->UA Lookup table from Census data
Returns: dict
{ StateCode: { PlaceName: UA Code } }
"""
logging.info("Beginning UA lookup")
lookup = defaultdict(dict)
download = requests.get(URL)
reader = csv.reader(download.content.decode('latin-1').encode('utf-8').splitlines(), delimiter=',')
not_designated = 0
total = 0
# skip header line
next(reader)
for row in reader:
total += 1
state_fips = row[2]
ua = row[0]
place_name = row[4]
place_fips = row[3]
if place_fips == '99999' or ua == '99999':
not_designated += 1
continue
cleaned_place_name = re.sub(r'\([^)]*\)', '', place_name).rstrip()
suffix_found = False
for suffix in SUFFIXES:
if cleaned_place_name.endswith(suffix):
cleaned_place_name = cleaned_place_name.replace(suffix, '').rstrip()
for delimiter in DELIMITERS:
if delimiter in cleaned_place_name:
places = cleaned_place_name.split(delimiter)
for place in places:
if place:
lookup[ABBR_LOOKUP[state_fips]][city_cleaner(place)] = ua
break
lookup[ABBR_LOOKUP[state_fips]][city_cleaner(cleaned_place_name)] = ua
suffix_found = True
break
if not suffix_found:
lookup[ABBR_LOOKUP[state_fips]][cleaned_place_name] = ua
logging.info(
'Done extracting urbanized areas and urban clusters. %s total rows, %s not designated, %s found',
total,
not_designated,
total - not_designated
)
return lookup