-
Notifications
You must be signed in to change notification settings - Fork 2
/
update_index.py
213 lines (186 loc) · 7.17 KB
/
update_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import os
import sys
import logging
import requests
import pycountry
import mysql.connector
from pathlib import Path
from datetime import datetime
def checkIfBatchDone(sql_conn, batch_id):
id = ''
sql_cursor = sql_conn.cursor()
try:
sql_cursor.execute("select BATCH from CITY_INFO where BATCH=%s", (batch_id,))
result = sql_cursor.fetchone()
if result:
id = result[0]
except Exception as e:
logging.error('error checking if this batch is processed already: {}'.format(e))
finally:
sql_cursor.close()
return id
def getIPAndPortAndPeersCountFromCrawlerDB(sql_conn):
ip_info = dict()
sql_cursor = sql_conn.cursor()
try:
sql_cursor.execute("select IP, PORT, CONNECTED from PEER_INFO")
result = sql_cursor.fetchall()
for row in result:
ip = row[0].rstrip('\n')
port = row[1]
connected = row[2]
key = ip + str(port)
ip_info[key] = (ip, port, connected)
except Exception as e:
logging.error('error getting IP, PORT and PEERS_COUNT from PEER_INFO : {}'.format(e))
finally:
sql_cursor.close()
return ip_info
def getLatLngCityFromIP(sql_conn, ip):
lat = ''
lng = ''
city = ''
if not ip:
return lat, lng, city
sql_cursor = sql_conn.cursor()
try:
sql_cursor.execute("select LAT, LNG, CITY from IP_INFO where IP=%s", (ip,))
result = sql_cursor.fetchone()
if result:
lat = result[0]
lng = result[1]
city = result[2].rstrip('\n')
except Exception as e:
logging.error('error getting city from IPTOCITY for IP {}: {}'.format(ip, e))
if not city:
url = "http://ipinfo.io/{}?token=21a1a8a7be196b".format(ip)
response = requests.get(url)
data = response.json()
if 'city' in data.keys():
city = data['city']
latlng = data['loc'].split(',')
lat = latlng[0]
lng = latlng[1]
country = ''
asn = ''
org = ''
isp = ''
if 'country' in data.keys():
country = data['country']
if 'asn' in data.keys():
asn = data['asn']['asn']
org = data['asn']['name']
isp = data['asn']['type']
fc = pycountry.countries.get(alpha_2=country)
if hasattr(fc, 'official_name'):
full_country = fc.official_name
elif hasattr(fc, 'name'):
full_country = fc.name
else:
full_country = country
try:
sql_cursor.execute("insert into IP_INFO (ip, lat, lng, city, country, asn, organisation, isp) values (%s, %s, %s, %s, %s, %s, %s, %s)",
(ip, lat, lng, city, full_country, asn, org, isp))
sql_conn.commit()
logging.info('city info for ip={} got from ipinfo city={}, lat={}, lng={}'.format(ip, city, lat, lng))
except Exception as e:
logging.error('error inserting city into IPTOCITY for IP {}: {}'.format(ip, e))
else:
lat = '0.00'
lng = '0.00'
city = 'NO_CITY'
full_country = 'NO_COUNTRY'
asn = 'NO_ASN'
org = 'NO_ORG'
isp = 'NO_ISP'
sql_cursor.execute(
"insert into IP_INFO (ip, lat, lng, city, country, asn, organisation, isp) values (%s, %s, %s, %s, %s, %s, %s, %s)",
(ip, lat, lng, city, full_country, asn, org, isp))
sql_conn.commit()
logging.error('inserting no_city into IPTOCITY for IP {}'.format(ip))
sql_cursor.close()
return lat, lng, city
def addToCityCount(city_count, city, lat, lng, green_count, orange_count, red_count):
if city in city_count.keys():
la, ln, green, orange, red = city_count[city]
green = green + green_count
orange = orange + orange_count
red = red + red_count
city_count[city] = la, ln, green, orange, red
else:
city_count[city] = lat, lng, green_count, orange_count, red_count
def insertCityCountsToTable(sql_conn, city_count, batch_id):
total_rows_in_this_batch = 0
for city in city_count:
la, ln, green_count, orange_count, red_count = city_count[city]
if not city or city == 'null':
continue
sql_cursor = sql_conn.cursor()
try:
sql_cursor.execute(
'insert into CITY_INFO (BATCH, CITY, LAT, LNG, GREEN_COUNT, ORANGE_COUNT, RED_COUNT) values (%s, %s, %s, %s, %s, %s, %s)',
(batch_id, city, la, ln, green_count, orange_count, red_count),)
total_rows_in_this_batch += 1
sql_conn.commit()
except Exception as e:
logging.error('error inserting into CITYBATCH: {}'.format(e))
finally:
sql_cursor.close()
logging.info('added {} rows in this batch'.format(str(total_rows_in_this_batch)))
def main():
city_counts = dict()
logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
home = str(Path.home())
db_password_file = home + '/bin/dbpwd'
file = open(db_password_file, mode='r')
dbPassword = file.read()
dbPassword = dbPassword.rstrip('\n')
file.close()
today = datetime.now()
batch_id = today.strftime("%Y-%m-%d-%H-%M")
logging.info('Starting update_index for batch id {}'.format(batch_id))
# open the beenodes DB
try:
sql_conn = mysql.connector.connect(
host="localhost",
user="crawler",
password=dbPassword,
database="crawler"
)
except Exception as e:
logging.error('error opening database: {}'.format(e))
sys.exit()
# check if this date is already processed
if checkIfBatchDone(sql_conn, batch_id):
logging.error("this batch is already processed")
sys.exit()
# check if lock is open, then start processing
pid = str(os.getpid())
pidfile = "/tmp/beenodes.pid"
if os.path.isfile(pidfile):
print(" {} already exists, exiting".format(pidfile))
sys.exit()
open(pidfile, "w+").write(pid)
try:
# load the ip info
ip_info = getIPAndPortAndPeersCountFromCrawlerDB(sql_conn)
for key in ip_info:
(ip, port, connected) = ip_info[key]
lat, lng, city = getLatLngCityFromIP(sql_conn, ip)
if city == 'NOCITY':
logging.error('could not proceed with {} as CITY could not be found'.format(ip))
addToCityCount(city_counts, city, 0, 0, 1, 0, 0)
continue
if connected == 0:
addToCityCount(city_counts, city, lat, lng, 0, 1, 0)
if connected == 1:
addToCityCount(city_counts, city, lat, lng, 1, 0, 0)
# Insert the batch data in to CITYBATCH table
insertCityCountsToTable(sql_conn, city_counts, batch_id)
finally:
sql_conn.close()
os.unlink(pidfile)
logging.info('removed lock and exiting')
if __name__ == "__main__":
main()