-
Notifications
You must be signed in to change notification settings - Fork 0
/
prodder.py
80 lines (66 loc) · 2.26 KB
/
prodder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import itertools
from datetime import datetime, timedelta
import time
import requests
import requests.exceptions as reqex
from faker import Faker
def gen_fake_header():
fake = Faker()
header = {}
header['X-Forwarded-User'] = fake.user_name()
header['user_agent'] = fake.user_agent()
header['X-Forwarded-For'] = fake.ipv4()
return header
def print_fmt(*args):
fmt = "{}|{}|{}|{}|{}|{}"
if args:
print(fmt.format(*args))
class EmptyTaskList(Exception):
pass
class ProdderEvents(object):
def __init__(self):
self.listeners = {}
def on(self, event, f):
if event in self.listeners:
self.listeners[event].append(f)
else:
self.listeners[event] = [f]
def trigger(self, event):
if event in self.listeners:
for func in self.listeners[event]:
return func()
class Prodder(ProdderEvents):
def __init__(self, tasks, lifespan=60, high=100, header=gen_fake_header()):
super().__init__()
self.tasks = tasks
self.start = datetime.now()
self.lifespan = lifespan
self.end = self.start + timedelta(seconds=self.lifespan)
self.high = high
self.rpm = 60/self.high
self.header = header
def prod(self):
if not self.tasks:
raise EmptyTaskList("""prodder task list is empty - no sites to crawl.
Please initialize prodder with urls.""")
while datetime.now() < self.end:
for task in self.tasks:
time.sleep(self.rpm)
try:
r = requests.get(task, headers=self.header)
print_fmt(
datetime.now(),
r.status_code,
r.request.url,
self.header['X-Forwarded-User'],
self.header["user_agent"],
self.header["X-Forwarded-For"])
self.trigger('prod')
except reqex.RequestException as ce:
# just handle baseclass exceptions for now
print(ce)
self.trigger('err')
if __name__ == "__main__":
tasks = ['http://blak.la/ybul']
mike = Prodder(tasks)
mike.prod()