-
Notifications
You must be signed in to change notification settings - Fork 0
/
compileData.py
executable file
·61 lines (41 loc) · 1.83 KB
/
compileData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import os
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
PATH_2016 = 'data/2016Campaign/raw/'
PATH_POST = 'data/postElection/raw/'
def collectData(folderPath):
data = pd.DataFrame()
for file in os.listdir(folderPath):
data = data.append(pd.read_csv(folderPath + file), ignore_index=True)
return data
def concatColumns(df, fName):
# For some reason f-strings no work?
# df[fName] = [f"{df['Venue']}, {df['City']}, {df['State']}"]
df[fName] = df['City'] + ', ' + df['State']
data2016 = collectData(PATH_2016)
dataPost = collectData(PATH_POST)
concatColumns(data2016, 'concatAddress')
concatColumns(dataPost, 'concatAddress')
locator = Nominatim(user_agent="Personal geocoding script")
geocoder = RateLimiter(locator.geocode, min_delay_seconds=1)
def geocode_df(df, geocoderObj):
df['location'] = df['concatAddress'].apply(geocoderObj)
df['point'] = df['location'].apply(lambda location: tuple(location.point))
df['finalAddress'] = df['location'].apply(lambda location: location.address)
df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)
geocode_df(data2016, geocoder)
geocode_df(dataPost, geocoder)
#Special cases ----------------------
def agg(dfFile, column):
data = pd.read_csv(dfFile)
aggData = data.groupby(column)[column].count()
return aggData
def fix(df, index, geoObj):
# [:8] for Bangor, [:10] for Portland
data = geoObj.geocode(df.at[index, 'concatAddress'][:10] + 'Maine')
df.at[index, 'location'] = str(data)
df.at[index, 'point'] = tuple(data.point)
df.at[index, 'finalAddress'] = data.address
df.at[index, 'latitude'] = tuple(data.point)[0]
df.at[index, 'longitude'] = tuple(data.point)[1]