-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathhttp.py
307 lines (264 loc) · 10.5 KB
/
http.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
import base64
from typing import Dict, List, Optional, Union
import mmh3
from pydantic import BaseModel
from hhhash import hash_from_banner
from common_osint_model.models import (
ShodanDataHandler,
CensysDataHandler,
BinaryEdgeDataHandler,
Logger,
)
from common_osint_model.utils import hash_all
class HTTPComponentContentFavicon(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger
):
"""Represents the favicon which might be included in HTTP components."""
raw: Optional[str] = None
md5: Optional[str] = None
sha1: Optional[str] = None
sha256: Optional[str] = None
murmur: Optional[str] = None
shodan_murmur: Optional[str] = None
@classmethod
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContentFavicon.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)
raw = d["http"]["favicon"]["data"]
raw = base64.b64decode(raw)
md5, sha1, sha256, murmur = hash_all(raw)
shodan_murmur = str(mmh3.hash(d["http"]["favicon"]["data"]))
cls.info(
"Shodan's favicon hash only hashes the base64 encoded favicon, not the data itself. The hash can be "
'found as "shodan_murmur" in this instance. "murmur" and the other hashes are calculated based on '
"the raw data of the favicon."
)
return HTTPComponentContentFavicon(
raw=d["http"]["favicon"]["data"],
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
shodan_murmur=shodan_murmur,
)
@classmethod
def from_censys(cls, d: Dict):
"""
Not supported by Censys right now.
TODO: Censys implemented Favicons.
"""
return None
@classmethod
def from_binaryedge(cls, d: Union[Dict, List]):
favicon = d["result"]["data"]["response"]["favicon"]["content"]
favicon_bytes = base64.b64decode(favicon.encode("utf-8"))
md5, sha1, sha256, murmur = hash_all(favicon_bytes)
shodan_murmur = str(mmh3.hash(favicon.encode("utf-8")))
return HTTPComponentContentFavicon(
raw=favicon,
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
shodan_murmur=shodan_murmur,
)
class HTTPComponentContentRobots(BaseModel, ShodanDataHandler, CensysDataHandler):
"""Represents the robots.txt file in webroots."""
raw: Optional[str] = None
md5: Optional[str] = None
sha1: Optional[str] = None
sha256: Optional[str] = None
murmur: Optional[str] = None
@classmethod
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContentRobots.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)
raw = d["http"]["robots"].encode("utf-8")
md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContentRobots(
raw=raw, md5=md5, sha1=sha1, sha256=sha256, murmur=murmur
)
@classmethod
def from_censys(cls, d: Dict):
"""Not supported by Censys right now."""
return None
class HTTPComponentContentSecurity(BaseModel, ShodanDataHandler, CensysDataHandler):
"""Represents the security.txt file in webroots."""
raw: Optional[str] = None
md5: Optional[str] = None
sha1: Optional[str] = None
sha256: Optional[str] = None
murmur: Optional[str] = None
@classmethod
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContentRobots.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)
raw = d["http"]["securitytxt"].encode("utf-8")
md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContentSecurity(
raw=raw, md5=md5, sha1=sha1, sha256=sha256, murmur=murmur
)
@classmethod
def from_censys(cls, d: Dict):
"""Not supported by Censys right now."""
return None
class HTTPComponentContent(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger
):
"""Represents the content (body) of HTTP responses."""
raw: Optional[str] = None
length: Optional[int] = None
md5: Optional[str] = None
sha1: Optional[str] = None
sha256: Optional[str] = None
murmur: Optional[str] = None
favicon: Optional[HTTPComponentContentFavicon] = None
robots_txt: Optional[HTTPComponentContentRobots] = None
security_txt: Optional[HTTPComponentContentSecurity] = None
@classmethod
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)
favicon = None
if "favicon" in d["http"]:
cls.debug("Favicon key found in Shodan data.")
favicon = HTTPComponentContentFavicon.from_shodan(d)
security_txt = None
if d["http"]["securitytxt"]:
cls.debug("Security.txt key found in Shodan data.")
security_txt = HTTPComponentContentSecurity.from_shodan(d)
robots_txt = None
if d["http"]["robots"]:
cls.debug("Robots.txt key found in Shodan data.")
robots_txt = HTTPComponentContentRobots.from_shodan(d)
raw = d["http"].get("html", "")
if not raw:
raw = ""
try:
raw = raw.encode("utf-8")
except UnicodeEncodeError as uee:
# TODO: This is very ugly, but spontanously I can't find a solution for the weird Shodan encoding issue.
cls.error(f"UnicodeEncodeError during Shodan result encoding: {uee}")
cls.warning("Using empty strings as HTML body.")
raw = "".encode("utf-8")
md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContent(
raw=raw,
length=len(raw),
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
favicon=favicon,
robots_txt=robots_txt,
security_txt=security_txt,
)
@classmethod
def from_censys(cls, d: Dict):
"""Creates an instance of this class based on Censys (2.0) data given as dictionary."""
http = d["http"]["response"]
raw = http["body"] if http["body_size"] > 0 else ""
md5, sha1, sha256, murmur = hash_all(raw.encode("utf-8"))
return HTTPComponentContent(
raw=raw,
length=len(raw),
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
favicon=HTTPComponentContentFavicon.from_censys(d),
robots_txt=HTTPComponentContentRobots.from_censys(d),
security_txt=HTTPComponentContentSecurity.from_censys(d),
)
@classmethod
def from_binaryedge(cls, d: Union[Dict, List]):
"""Creates an instance of this class based on BinaryEdge data given as dictionary. Robots and Security.txt are
not supported by BinaryEdge."""
http_response = d["result"]["data"]["response"]
raw = http_response["body"]["content"]
md5, sha1, sha256, murmur = hash_all(raw.encode("utf-8"))
return HTTPComponentContent(
raw=raw,
length=len(raw),
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
favicon=HTTPComponentContentFavicon.from_binaryedge(d),
)
class HTTPComponent(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler
):
"""Represents the HTTP component of services."""
headers: Optional[Dict[str, str]] = None
content: Optional[HTTPComponentContent] = None
shodan_headers_hash: Optional[str] = None
hhhash: Optional[str] = None
@classmethod
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)
content = HTTPComponentContent.from_shodan(d)
banner = d["data"]
lines = banner.split("\r\n")
headers = {}
for line in lines:
if ":" in line:
key, value = line.split(":", maxsplit=1)
headers[key.strip()] = value.strip()
headers_hash = d.get("http", {}).get("headers_hash", None)
return HTTPComponent(
headers=headers,
content=content,
shodan_headers_hash=str(headers_hash) if headers_hash else None,
hhhash=hash_from_banner(banner),
)
@classmethod
def from_censys(cls, d: Dict):
"""Todo: Is parsing from services.banner better than just looping over the headers found by Censys?"""
http = d["http"]["response"]
headers = {}
for k, v in http["headers"].items():
if k[0] == "_":
continue
headers.update({k.replace("_", "-"): " ".join(v)})
banner_lines = d["banner"].replace("\r", "").split("\n")
banner_keys = banner_lines[0]
for line in banner_lines:
if ":" in line:
k, _ = line.split(":", maxsplit=1)
banner_keys += "\n" + k
headers_hash = str(mmh3.hash(banner_keys.encode("utf-8")))
return HTTPComponent(
headers=headers,
content=HTTPComponentContent.from_censys(d),
shodan_headers_hash=headers_hash,
hhhash=hash_from_banner(d["banner"]),
)
@classmethod
def from_binaryedge(cls, d: Union[Dict, List]):
http_response = d["result"]["data"]["response"]
headers = http_response["headers"]["headers"]
return HTTPComponent(
headers=headers, content=HTTPComponentContent.from_binaryedge(d)
)