-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathanidex.py
117 lines (100 loc) · 3.87 KB
/
anidex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# VERSION: 0.02
# AUTHORS: nindogo (nindogo@gmail.com)
# LICENSING INFORMATION
import re
import time
import threading
try:
# Python 3
from html.parser import HTMLParser
except ImportError:
# Python 2
from HTMLParser import HTMLParser
from helpers import retrieve_url
from novaprinter import prettyPrinter
class anidex(object):
url = 'https://anidex.info/'
name = 'AniDex'
supported_categories = {
'all': '',
'music': 'id=9,10,11&',
'games': 'id=12&',
'anime': 'id=1,2,3&',
'software': 'id=13&',
'pictures': 'id=14&',
'books': 'id=6,7,8&',
}
class anidexParser(HTMLParser):
url = 'https://anidex.info'
TR, TH, TD, A, SPAN = 'tr', 'th', 'td', 'a', 'span'
inRow = False
getSize = False
getSeed = False
getLeech = False
this_result = {}
def handle_starttag(self, tag, attrs):
if tag == self.TR and self.inRow is False:
self.inRow = True
if tag == self.TH and self.inRow is True:
self.inRow = False
if self.inRow is True and tag == self.TD:
my_attrs = dict(attrs)
if my_attrs.get('class') == 'text-center td-992' and my_attrs.get('title') is None:
self.getSize = True
if my_attrs.get('class') == 'text-success text-right':
self.getSeed = True
if my_attrs.get('class') == 'text-danger text-right':
self.getLeech = True
if self.inRow and tag == self.A:
my_attrs = dict(attrs)
if my_attrs.get('href').startswith('magnet'):
self.this_result['link'] = my_attrs.get('href')
if my_attrs.get('class') == 'torrent':
self.this_result['desc_link'] = self.url + my_attrs.get('href')
if self.inRow and tag == self.SPAN:
my_attrs = dict(attrs)
if my_attrs.get('class') == 'span-1440':
self.this_result['name'] = my_attrs.get('title')
def handle_endtag(self, tag):
if self.inRow is True and tag == self.TR:
self.inRow = False
self.this_result['engine_url'] = self.url
prettyPrinter(self.this_result)
def handle_data(self, data):
if self.inRow and self.getSize:
self.this_result['size'] = data.strip().replace(',', '')
self.getSize = False
if self.inRow and self.getSeed:
self.this_result['seeds'] = data.strip().replace(',', '')
self.getSeed = False
if self.inRow and self.getLeech:
self.this_result['leech'] = data.strip().replace(',', '')
self.getLeech = False
def do_search(self, url):
webpage = retrieve_url(url)
adexParser = self.anidexParser()
adexParser.feed(webpage)
def search(self, what, cat='all'):
query = str(what).replace(' ', '+')
search_url = self.url + \
'?s=seeders&o=desc&' + \
self.supported_categories[cat.lower()] + \
'q=' + query
webpage = retrieve_url(search_url)
total_results = re.findall(r'Showing[^f]+f(.+?)torrents', webpage)[0].strip().replace(',', '')
total_results = int(total_results)
adexParser = self.anidexParser()
adexParser.feed(webpage)
threads = []
for offset in range(50, total_results, 50):
this_url = search_url + '&offset=' + str(offset)
t = threading.Thread(args=(this_url,), target=self.do_search)
time.sleep(2)
t.start()
threads.append(t)
# self.do_search(this_url)
for t in threads:
t.join()
if __name__ == '__main__':
a = anidex()
a.search('DS', 'all')