forked from britishredcrosssociety/covid-19-vulnerability
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprep hospitals accessibility.py
176 lines (138 loc) · 6.95 KB
/
prep hospitals accessibility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import numpy as np
import pandas as pd
import geopandas as gpd
import os
from scipy.spatial import cKDTree
############
#
# Globals & Functions
#
############
data_dir = ".\\data"
GBcrs = {"init":"epsg:27700"}
# OS Open Roads accessed from: https://www.ordnancesurvey.co.uk/opendatadownload/products.html
open_roads_dir = os.path.join(data_dir, "OS Open Roads")
# Hospital Names and Locations: https://data.gov.uk/dataset/f4420d1c-043a-42bc-afbc-4c0f7d3f1620/hospitals
hosp_data_dir = os.path.join(data_dir, "hospital_data")
hospitals_file = "Hospital.csv"
# LSOA 2011 Pop Weighted Centroids: https://geoportal.statistics.gov.uk/datasets/lower-layer-super-output-areas-december-2011-population-weighted-centroids
lsoa_pop_weighted_centroids_file = "Lower_Layer_Super_Output_Areas_December_2011_Population_Weighted_Centroids.csv"
##############
#
# Read and clean data
#
##############
dfHospitals = pd.read_csv(os.path.join(hosp_data_dir, hospitals_file), delimiter="¬")
dfLSOACent = pd.read_csv(os.path.join(data_dir, lsoa_pop_weighted_centroids_file))
# Create geometries from coordinates in the Hospitals and LSOA Centroids data
gdfHospitals = gpd.GeoDataFrame(dfHospitals, geometry=gpd.points_from_xy(dfHospitals.Longitude, dfHospitals.Latitude), crs = "EPSG:4326")
gdfHospitals = gdfHospitals.to_crs(GBcrs)
gdfHospitals.dropna(subset = ['Latitude','Longitude'], inplace = True)
gdfLSOACent = gpd.GeoDataFrame(dfLSOACent, geometry = gpd.points_from_xy(dfLSOACent.X, dfLSOACent.Y), crs = "EPSG:27700")
# Open Roads Data
# commented out because once data files have been combined and saved as a single file this section
# of code doesn't need to be executed
'''
# Combine mutiple OS Road Nodes files into a single geodataframe
road_node_files = [i for i in os.listdir(open_roads_dir) if "RoadNode.shp" in i]
gdfRoadNodes = gpd.GeoDataFrame()
for f in road_node_files:
gdf = gpd.read_file(os.path.join(open_roads_dir, f))
gdf['file'] = f
gdfRoadNodes = pd.concat([gdfRoadNodes, gdf])
gdfRoadNodes = gdfRoadNodes.to_crs(GBcrs)
gdfRoadNodes.to_file(os.path.join(data_dir, "OS Open Road Nodes.shp"))
'''
'''
road_link_files = [i for i in os.listdir(open_roads_dir) if "RoadLink.shp" in i]
gdfRoadLinks = gpd.GeoDataFrame()
for f in road_link_files:
gdf = gpd.read_file(os.path.join(open_roads_dir, f))
gdfRoadLinks = pd.concat([gdfRoadLinks, gdf])
gdfRoadLinks = gdfRoadLinks.to_crs(GBcrs)
gdfRoadLinks.to_file(os.path.join(data_dir,"OS Open Road Links.shp"))
'''
########################
#
# Use road network to find distances to nearest hospitals
#
########################
import networkx as nx
import pandana as pdna
# Extract the road network topology from the gis data
print("Loading network topology")
'''
# commented out because once network topology has been extracted from open road gis data this section doesn't need tp be executed
gdfRoadNodes = gpd.read_file(os.path.join(data_dir, "OS Open Road Nodes.shp"))
gdfRoadLinks = gpd.read_file(os.path.join(data_dir,"OS Open Road Links.shp"))
dfNodes = pd.DataFrame({"identifier":gdfRoadNodes.identifier, "x": gdfRoadNodes.geometry.x, "y":gdfRoadNodes.geometry.y})
dfNodes.set_index("identifier", inplace=True)
dfLinks = gdfRoadLinks.loc[:, ['startNode', 'endNode', 'length']]
dfNodes.to_csv(os.path.join(data_dir, "OS Open Road Nodes.csv"))
dfLinks.to_csv(os.path.join(data_dir, "OS Open Road Links.csv"))
gdfRoadNodes = None
gdfRoadLinks = None
'''
dfNodes = pd.read_csv(os.path.join(data_dir, "OS Open Road Nodes.csv"))
dfLinks = pd.read_csv(os.path.join(data_dir, "OS Open Road Links.csv"))
dfNodes.drop_duplicates(inplace=True)
dfNodes.set_index('identifier', inplace=True)
gdfHospitals.set_index('OrganisationCode', inplace = True)
# Get largest connected component
print("Finding largest connected component")
edges = dfLinks.loc[:,['startNode','endNode','length']].values
G = nx.Graph()
G.add_weighted_edges_from(edges)
largest_connected_component = sorted(nx.connected_components(G), key = len, reverse=True)[0]
# Clean up to save memory
G = None
edges = None
# Create pandana network. It's much faster for nearest point-of-interest analysis
print("Creating Network")
# Filter nodes and edges to just include those in the largest connected componet
dfLinksLCC = dfLinks.loc[(dfLinks['startNode'].isin(largest_connected_component)) & (dfLinks['endNode'].isin(largest_connected_component))]
dfNodesLCC = dfNodes.loc[largest_connected_component]
net=pdna.Network(dfNodesLCC["x"], dfNodesLCC["y"], dfLinksLCC["startNode"], dfLinksLCC["endNode"], dfLinksLCC[["length"]])
# Can then get the nearest 3 hospitals to each LSOA centroid
print("Finding nearest hospitals")
search_distance = 200000
net.set_pois("hospitals", search_distance, 3, gdfHospitals.geometry.x, gdfHospitals.geometry.y)
dfNear = net.nearest_pois(search_distance,"hospitals", num_pois=3, include_poi_ids=True)
# Get just the LSOA centroids and their nearest hospitals
lsoa_nodes = net.get_node_ids(gdfLSOACent.X, gdfLSOACent.Y)
dfNearLSOACentroids = dfNear.loc[lsoa_nodes]
# Include LSOA Codes
gdfLSOACent['lsoa_nodes'] = lsoa_nodes
dfNearLSOACentroids.reset_index(inplace=True)
dfNearLSOACentroids = pd.merge(dfNearLSOACentroids, gdfLSOACent, left_on = 'identifier', right_on = 'lsoa_nodes', how = 'outer', indicator = True)
# Check each LSOA Centroid has nearest 3 hospitals
assert dfNearLSOACentroids['_merge'].value_counts()['left_only'] == 0
assert dfNearLSOACentroids['_merge'].value_counts()['right_only'] == 0
assert dfNearLSOACentroids['poi1'].isnull().any() == False
assert dfNearLSOACentroids['poi2'].isnull().any() == False
assert dfNearLSOACentroids['poi3'].isnull().any() == False
#########################
#
# Clearing up and making output more presentable
#
#########################
# Compute distance to nearest hospital and average distance to nearest 3 hospitals
dfNearLSOACentroids['distance_nearest_hospital'] = dfNearLSOACentroids[1]
dfNearLSOACentroids['mean_distance_nearest_three_hospitals'] = dfNearLSOACentroids.loc[:,[1,2,3]].mean(axis = 1)
dfNearLSOACentroids = dfNearLSOACentroids.reindex(columns = [ 'lsoa11cd','lsoa11nm','lsoa_nodes',
1,2,3,'poi1','poi2','poi3',
'mean_distance_nearest_three_hospitals'])
dfNearLSOACentroids = dfNearLSOACentroids.rename(columns = {1:'distance1',2:'distance2',3:'distance3',
'poi1':'hospital1','poi2':'hospital2','poi3':'hospital3',
'lsoa_nodes':'lsoa_centroid_road_node'})
dfNearLSOACentroids.to_csv(os.path.join(data_dir, "lsoa_nearest_hospitals.csv"), index=False)
####################
#
# Make a map
#
###################
f, ax = plt.subplots(1,1)
legend_info ={'label': "Mean distance to nearest 3 hospitals (m)"}
gdflsoas.plot(column = 'mean_distance_nearest_three_hospitals', ax = ax, legend = True, legend_kwds = legend_info)
plt.axis('off')
f.savefig(os.path.join(data_dir, 'lsoa_hospital_distance_map.png'))