-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathosmnodepbf.py
219 lines (201 loc) · 8.12 KB
/
osmnodepbf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# -*- coding: utf-8 -*-
# vim: ai ts=4 sts=4 et sw=4
"""
Based on parsepbf.py by
Chris Hill <osm@raggedred.net>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
__author__ = 'Praneeth Bodduluri'
__email__ = 'lifeeth[at]gmail.com'
import osmformat_pb2
import fileformat_pb2
from struct import unpack
import zlib
class Parser:
"""This class helps parse the pbf file for nodes with the specified tags"""
def __init__(self,filename):
"""Initialize the class with the filename of the pbf you want to parse"""
self.filename = filename
self.fpbf=open(self.filename, "rb")
self.tags = {} # content is {key: set(values)}, ...
self.nodes = []
self.blobhead=fileformat_pb2.BlobHeader()
self.blob=fileformat_pb2.Blob()
self.hblock=osmformat_pb2.HeaderBlock()
self.primblock=osmformat_pb2.PrimitiveBlock()
self.membertype = {0:'node',1:'way',2:'relation'}
if self.readPBFBlobHeader()==False:
return False
#read the blob
if self.readBlob()==False:
return False
#check the contents of the first blob are supported
self.hblock.ParseFromString(self.BlobData)
for rf in self.hblock.required_features:
if rf in ("OsmSchema-V0.6","DenseNodes"):
pass
else:
raise TypeError("not a required feature %s" % rf )
def readPBFBlobHeader(self):
"""Read a blob header, store the data for later"""
size=self.readint()
if size <= 0:
return False
if self.blobhead.ParseFromString(self.fpbf.read(size))==False:
return False
return True
def readBlob(self):
"""Get the blob data, store the data for later"""
size=self.blobhead.datasize
if self.blob.ParseFromString(self.fpbf.read(size))==False:
return False
if self.blob.raw_size > 0:
# uncompress the raw data
self.BlobData=zlib.decompress(self.blob.zlib_data)
#print "uncompressed BlobData %s"%(self.BlobData)
else:
#the data does not need uncompressing
self.BlobData=raw
return True
def readNextBlock(self):
"""read the next block. Block is a header and blob, then extract the block"""
# read a BlobHeader to get things rolling. It should be 'OSMData'
if self.readPBFBlobHeader()== False:
return False
if self.blobhead.type != "OSMData":
print "Expected OSMData, found %s"%(self.blobhead.type)
return False
# read a Blob to actually get some data
if self.readBlob()==False:
return False
# extract the primative block
self.primblock.ParseFromString(self.BlobData)
return True
def readint(self):
"""read an integer in network byte order and change to machine byte order. Return -1 if eof"""
be_int=self.fpbf.read(4)
if len(be_int) == 0:
return -1
else:
le_int=unpack('!L',be_int)
return le_int[0]
def parse(self,tag = {},refresh = False):
"""This parses the pbf for nodes for the given tags"""
if refresh:
self.__init__(self.filename)
while self.readNextBlock():
for pg in self.primblock.primitivegroup:
if len(pg.nodes)>0:
self.processNodes(pg.nodes,tag)
if len(pg.dense.id)>0:
self.processDense(pg.dense,tag)
return self.nodes
def processNodes(self,nodes,tag={}):
"""This process the nodes adding the ones with the requested tag to a list"""
NANO=1000000000L
found_tag = False
gran=float(self.primblock.granularity)
latoff=float(self.primblock.lat_offset)
lonoff=float(self.primblock.lon_offset)
for nd in nodes:
node = {}
node["tag"] = []
for i in range(len(nd.keys)):
ky=nd.keys[i]
vl=nd.vals[i]
sky=self.primblock.stringtable.s[ky] #Key
svl=self.primblock.stringtable.s[vl] #Value
node["tag"].append({sky:svl})
if sky in tag.keys():
if ( svl in tag.values() ) or ( tag.values()[0] == "*") :
found_tag = True
if not tag:
if sky in self.tags:
self.tags[sky].add(svl)
else:
self.tags[sky] = set([svl])
if found_tag:
node["nodeid"]=nd.id
node["lat"]=float(nd.lat*gran+latoff)/NANO
node["lon"]=float(nd.lon*gran+lonoff)/NANO
node["vs"]=nd.info.version
node["ts"]=nd.info.timestamp
node["uid"]=nd.info.uid
node["user"]=nd.info.user_sid
node["cs"]=nd.info.changeset
node["tm"]=ts*self.primblock.date_granularity/1000
self.nodes.append(node)
found_tag = False
def processDense(self, dense, tag={}):
"""process a dense node block"""
NANO=1000000000L
found_tag = False
#DenseNode uses a delta system of encoding os everything needs to start at zero
lastID=0
lastLat=0
lastLon=0
tagloc=0
cs=0
ts=0
uid=0
user=0
gran=float(self.primblock.granularity)
latoff=float(self.primblock.lat_offset)
lonoff=float(self.primblock.lon_offset)
for i in range(len(dense.id)):
node={}
node["tag"] = []
lastID+=dense.id[i]
lastLat+=dense.lat[i]
lastLon+=dense.lon[i]
lat=float(lastLat*gran+latoff)/NANO
lon=float(lastLon*gran+lonoff)/NANO
user+=dense.denseinfo.user_sid[i]
uid+=dense.denseinfo.uid[i]
vs=dense.denseinfo.version[i]
ts+=dense.denseinfo.timestamp[i]
cs+=dense.denseinfo.changeset[i]
suser=self.primblock.stringtable.s[user]
tm=ts*self.primblock.date_granularity/1000
if tagloc<len(dense.keys_vals): # don't try to read beyond the end of the list
while dense.keys_vals[tagloc]!=0:
ky=dense.keys_vals[tagloc]
vl=dense.keys_vals[tagloc+1]
tagloc+=2
sky=self.primblock.stringtable.s[ky] #Key
svl=self.primblock.stringtable.s[vl] #Value
node["tag"].append({sky:svl})
if sky in tag:
if ( svl in tag.values() ) or ( tag.values()[0] == "*") :
found_tag = True
if not tag:
if sky in self.tags:
self.tags[sky].add(svl)
else:
self.tags[sky] = set([svl])
tagloc+=1
if found_tag:
node["nodeid"]=lastID
node["lon"]=lon
node["lat"]=lat
node["user"]=suser
node["uid"]=uid
node["version"]=vs
node["changeset"]=cs
node["time"]=tm
self.nodes.append(node)
found_tag = False
def return_tags(self, refresh = False):
"""Returns all the keyvalue pairs in the tag list"""
if len(self.tags) == 0 or refresh:
self.parse()
return self.tags