-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyse.py
157 lines (120 loc) · 6.71 KB
/
analyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
import lib
import ujson
import bz2
from collections import defaultdict
import requests
class Analyse():
# Parses and analyses the total list of the retrieved datasets
def __init__(self, ixp_filename, as_to_ixp_filename, as_to_facility_filename, as_to_relationship_v4_filename, as_to_relationship_v6_filename, customer_cone_filename, asns):
self.asns = asns
self.ixp_info = {}
self.as_to_ixp_info = defaultdict(set)
self.as_to_facility_info = defaultdict(set)
self.p2c = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.c2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.p2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.customer_cone = {}
self.as2org = {}
# assign the filenames of the downloaded dataset files
self.ixp_filename = ixp_filename
self.as_to_ixp_filename = as_to_ixp_filename
self.as_to_facility_filename = as_to_facility_filename
self.as_to_relationship_v4_filename = as_to_relationship_v4_filename
self.as_to_relationship_v6_filename = as_to_relationship_v6_filename
self.customer_cone_filename = customer_cone_filename
# initiate the parsing process
self.import_ixps_info()
self.import_as_to_ixps_info()
self.import_as_faciiity_info()
self.import_as_to_customer_cone()
self.import_as_relationship(self.as_to_relationship_v4_filename, 'v4')
self.import_as_relationship(self.as_to_relationship_v6_filename, 'v6')
self.get_as_to_organisations()
def export_data(self, output_filename):
print('Exporting data to: '+output_filename)
data_to_export = defaultdict(dict)
for asn in self.asns:
asn = int(asn)
data_to_export[asn]['asn'] = asn
data_to_export[asn]['orgname'] = self.as2org.get(asn)
data_to_export[asn]['ixps'] = self.as_to_ixp_info.get(asn, None)
data_to_export[asn]['facilities'] = self.as_to_facility_info.get(asn)
data_to_export[asn]['providers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v4'].get(asn, [])]
data_to_export[asn]['providers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v6'].get(asn, [])]
data_to_export[asn]['customers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v4'].get(asn, [])]
data_to_export[asn]['customers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v6'].get(asn, [])]
data_to_export[asn]['peers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v4'].get(asn, [])]
data_to_export[asn]['peers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v6'].get(asn, [])]
data_to_export[asn]['customer_cone'] = self.customer_cone.get(asn, None)
lib.export_json(data_to_export, output_filename)
print('Finished')
def get_as_to_organisations(self):
# Fetch for each candidate ASN its corresponding organisation name.
print('Fetching organisation names for each ASN...')
extra_asns=set([])
for asn in self.asns:
extra_asns = extra_asns.union(self.c2p['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.c2p['v6'].get(asn, set()))
extra_asns = extra_asns.union(self.p2c['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.p2c['v6'].get(asn, set()))
extra_asns = extra_asns.union(self.p2p['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.p2p['v6'].get(asn, set()))
extra_asns = [str(asn) for asn in extra_asns.union(self.asns)]
chunk = 400
caida_api_url= 'https://api.data.caida.org/as2org/v1/asns/'
for i in range(0, len(extra_asns), chunk):
caida_api_url+= '_'.join(extra_asns[i:i+chunk])
request_response = requests.get(url=caida_api_url).json()
for entry in request_response['data']:
asn = int(entry['asn'])
orgname = entry['orgName']
self.as2org[asn] = orgname
#reset query url
caida_api_url = 'https://api.data.caida.org/as2org/v1/asns/'
def import_ixps_info(self):
# Parse the total list of the available IXPs
with open(self.ixp_filename) as f:
next(f)
for line in f:
data = ujson.loads(line)
country = data.get('country') if not isinstance(data.get('country'), list) else data.get('country')[0]
city = data.get('city') if not isinstance(data.get('city'), list) else data.get('city')[0]
self.ixp_info[ int(data['ix_id'])] = (data['name'], country, city)
def import_as_to_ixps_info(self):
# Parse the IXP membership for each AS
with open(self.as_to_ixp_filename) as f:
next(f)
for line in f:
data = ujson.loads(line)
self.as_to_ixp_info[int(data['asn'])].add( self.ixp_info[int(data['ix_id'])] )
def import_as_faciiity_info(self):
# Parse the facility membership for each AS
data = lib.import_json(self.as_to_facility_filename)
for entry in data['netfac']['data']:
self.as_to_facility_info[int(entry['local_asn'])].add((entry['name'], entry.get('country'), entry.get('city')))
def import_as_relationship(self, filename, version):
# Parse the AS relationships
with bz2.open(filename, 'rt') as file:
for line in file:
if not line.startswith('#'):
line = line.strip().split('|')
provider = int(line[0])
customer = int(line[1])
relationship = line[2]
# provider-to-customer relations
if relationship == '-1':
self.p2c[version][provider].add(customer)
self.c2p[version][customer].add(provider)
# peer-to-peer relations
elif relationship == '0':
self.p2p[version][provider].add(customer)
self.p2p[version][customer].add(provider)
def import_as_to_customer_cone(self):
# Parse the customer cone of each AS
with bz2.open(self.customer_cone_filename, 'rt') as file:
for line in file:
if not line.startswith('#'):
line = line.strip().split()
self.customer_cone[int(line[0])] = [int(asn) for asn in line[1:]]
self.customer_cone[int(line[0])].remove(int(line[0]))