-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyzer.py
210 lines (160 loc) · 6.52 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import re
from clientinfo import _make_request_instances, get_info
#--------------HELP VARIABLES---------------
#All different types of browser, most use Mozilla
valid_applications = ['Mozilla','Opera', 'Safari']
#Most common types of operative systems
valid_systems = ['Mac OS', 'Linux', 'Windows', 'X11', 'Android', 'PlayStation']
#Most common types of operative platforms
valid_platforms = ['AppleWebKit', 'KHTML']
#Most common types of platform details
valid_platform_details = ['Gecko', 'KHTML']
#Most common types of extensions
valid_extensions = ['Gecko', 'Safari', 'Firefox', 'Chrome', 'Mobile', 'Version', 'Presto', 'Waterfox', 'Chromium', 'Edge', 'OPR', 'Edg', 'Yowser', 'YaBrowser']
#--------------HELP FUNCTIONS---------------
def useragent_long(ua_splitted):
#Boolean to keep track if each test passes. If a test passes, reset to False and go to next
real = False
#Checks if the app is a legit one (e.g. "Mozilla", "Opera", "Safari")
for app in valid_applications:
if(ua_splitted[0].find(app) != -1):
real = True
if not real:
print("Not a real APP name: ", ua_splitted[0])
#Not a valid application name
return False
real = False
#Checks if the system is a legit one (e.g. "Windows NT", "Linux", "Mac OS")
for sys in valid_systems:
if(ua_splitted[1].find(sys) != -1):
real = True
if not real:
print("Not a real SYSTEM name: ", ua_splitted[1])
#Not a valid system
return False
real = False
#Checks if the platform is a legit one (e.g. "AppleWebKit")
for plat in valid_platforms:
if(ua_splitted[2].find(plat) != -1):
real = True
if not real:
print("Not a real PLATFORM name: ", ua_splitted[2])
#Not a valid platform
return False
real = False
#Checks if the platform detail is a legit one (e.g. "KHTML")
for dets in valid_platform_details:
if(ua_splitted[3].find(dets)):
real = True
if not real:
print("Not a real PLATFORM detail name: ", ua_splitted[3])
#Not a valid platform detail
return False
real = False
#Checks if the extension is a legit one (e.g. "Mobile")
for ext in valid_extensions:
if(ua_splitted[4].find(ext) != -1):
real = True
if not real:
print("Not a real EXTENSION name: ", ua_splitted[4])
#Not a valid extension
return False
#Made all five tests without ending pre-maturely, the user-agent is a real one
return True
def useragent_short(ua_splitted):
#Boolean to keep track if each test passes. If a test passes, reset to False and go to next
real = False
#Checks if the app is a legit one (e.g. "Mozilla", "Opera", "Safari")
for app in valid_applications:
if(ua_splitted[0].find(app) != -1):
real = True
if not real:
print("Not a real APP name: ", ua_splitted[0])
#Not a valid application name
return False
real = False
#Checks if the system is a legit one (e.g. "Windows NT", "Linux", "Mac OS")
for sys in valid_systems:
if(ua_splitted[1].find(sys) != -1):
real = True
if not real:
print("Not a real SYSTEM name: ", ua_splitted[1])
#Not a valid system
return False
real = False
for ext in valid_extensions:
if(ua_splitted[2].find(ext) != -1):
real = True
if not real:
print("Not a real EXTENSION name: ", ua_splitted[2])
#Not a valid extension
return False
#Made all three tests without ending pre-maturely, the user-agent is a real one
return True
#User-Agent (UA): App/X.X (<system-information>) <platform> (<platform-details>) <extensions>
#Checks if the given user-agent is legit, return either True or False
def real_useragent(user_agent):
#Split UA with radix ' (' or ') '
ua_splitted = re.split('\ \(|\)\ ', user_agent)
length = len(ua_splitted)
##Check the length of the UA and call for helper function.
#Too short to be a real UA
if length < 3:
return False
elif length == 3:
#e.g: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:74.0) Gecko/20100101 Firefox/74.0'
return useragent_short(ua_splitted)
elif length == 5:
#e.g: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
return useragent_long(ua_splitted)
else: return False
#-------------ANALYZE A REQUEST--------------
#Analyzes a request with LEVEL 1 detection, return a integer: 1 or 4
def analyze_level_one(ip_info):
if(real_useragent(ip_info['user_agents'][0]) is False):
return 4
return 1
#Analyzes a request with LEVEL 2 detection, return a integer: 1, 2, 3, 4
def analyze_level_two(ip_info):
value = 1
#Request contains: request_rate, number_of_user_agents, user_agents,
user_agents = ip_info['user_agents']
number_of_ua = ip_info['number_of_user_agents']
number_of_requests = ip_info['number_of_requests']
avarage_request_rate = ip_info['avarage_request_rate']
ua_ratio = number_of_ua/number_of_requests
#Test 1, is all the user-agents real from all the requests made from this IP?
for ua in user_agents:
if(real_useragent(ua) is False):
return 4
if number_of_ua > 1.03: #avarage number of user agents in the human set
value = value + 1
if avarage_request_rate > 0.49: #taken from human set
value = value + 1
if ua_ratio > 0.69 and number_of_requests > 2.71: #taken from human set
value = value + 1
return value
#--------------------MAIN--------------------
#Recieve a list of IPs to analyze, for each IP, decide LEVEL 1 or 2
def analyze(ip_list,table):
score_list = []
for ip in ip_list:
#check ip in DB and decide LEVEL 1 or LEVEL 2
ip_info = get_info(ip,table)
if ip_info['number_of_requests'] == 1:
score = analyze_level_one(ip_info)
else:
score = analyze_level_two(ip_info)
#Store each detection score in list
score_list.append(score)
#Returns a list of touples, each request with its score
zipped = zip(ip_list, score_list)
#Format: [(ip1, score1),...(ipx,scorex)]
return list(zipped)
# user_agent_rotator = UserAgent(limit=100)
# list = user_agent_rotator.get_user_agents()#
# for ua in list:
# real = real_useragent(ua['user_agent'])
# if real is False:
# print('False UA: ' + ua['user_agent'] + '\n')
# print(ua['user_agent'] + '\n')