-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathuniondht.py
151 lines (130 loc) · 9.78 KB
/
uniondht.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#VERSION: 0.03
#AUTHORS: nindogo
import threading
import logging
import time
import urllib
import codecs
import ssl
import tempfile
# logging.basicConfig(level=logging.INFO,
# format='%(asctime)s:%(levelname)s:%(filename)s:%(threadName)s:%(lineno)d:%(message)s')
# logging.getLogger(__name__)
import re
#qbt
from helpers import retrieve_url
from novaprinter import prettyPrinter
class unionDHTParser(threading.Thread):
page_link = threading.local()
# page_content = threading.local()
# the_page_results = threading.local()
each_result = threading.local()
b = threading.local()
SEE_ALL = re.compile(r'tLink"\s+?href="(.+?)"><b>(.*?)<\/b>.+?tr-dl" href="(.+?)">(.+?)<\/a>.+?seedmed bold">(\d)<\/td>.+?leechmed" title="Личеров"><b>(\d)',re.S)
def __init__(self, url):
logging.info('Initiating Parser')
self.page_link = url
threading.Thread.__init__(self)
logging.info('Parser Initiation complete')
def get_page_data(self, url):
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 Firefox/38.0')
with urllib.request.urlopen(req) as response:
html = response.read()
new_html = codecs.decode(html, encoding='cp1251')
return(new_html)
def run(self):
logging.info('Starting unionDHTParser Thread')
logging.debug('Current link is {}'.format(self.page_link))
page_content = self.get_page_data(self.page_link)
logging.debug('Thread data: {}'.format(page_content))
the_page_results = re.findall(self.SEE_ALL, page_content)
logging.debug(the_page_results)
for self.each_result in the_page_results:
logging.debug('Preparing to PrettyPrint {}'.format(self.each_result[1]))
self.b = dict()
self.b['desc_link'] = 'http://uniondht.org' + self.each_result[0]
self.b['name'] = self.each_result[1].replace('<wbr>','')
self.b['link'] = '' #'http://uniondht.org' + self.each_result[2]
self.b['size'] = self.each_result[3].replace(' ', '').replace(' ', ' ')
self.b['seeds'] = self.each_result[4]
self.b['leech'] = self.each_result[5]
self.b['engine_url'] = 'http://uniondht.org'
logging.debug('ready to prettyPrint record: {}'.format(self.b['name']))
prettyPrinter(self.b)
self.b.clear()
def get_page_data(url):
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 Firefox/38.0')
ctx = ssl.SSLContext()
# ctx.verify_mode = ssl.CERT_NONE
with urllib.request.urlopen(req,context=ctx) as response:
new_html = response.read().decode('cp1251')
# print((new_html))
# print(type(html))
# new_html = codecs.decode(html, encoding='cp1251')
# new_html = html.decode('cp1251')
return(new_html)
def get_page_data_encoded(url):
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 Firefox/38.0')
ctx = ssl.SSLContext()
# ctx.verify_mode = ssl.CERT_NONE
with urllib.request.urlopen(req,context=ctx) as response:
new_html = response.read()
# print(type(html))
# new_html = codecs.decode(html, encoding='cp1251')
# new_html = html.decode('cp1251')
return(new_html)
class uniondht(object):
logging.info('Class Starting')
supported_categories = {'all': ''
, 'books': '&f%5B%5D=810&f%5B%5D=812&f%5B%5D=813&f%5B%5D=818&f%5B%5D=826&f%5B%5D=910&f%5B%5D=817&f%5B%5D=816&f%5B%5D=815&f%5B%5D=814&f%5B%5D=811&f%5B%5D=825&f%5B%5D=824&f%5B%5D=823&f%5B%5D=822&f%5B%5D=820&f%5B%5D=819'
, 'anime': '&f%5B%5D=211&f%5B%5D=517&f%5B%5D=518&f%5B%5D=928&f%5B%5D=929&f%5B%5D=930&f%5B%5D=519&f%5B%5D=520&f%5B%5D=212&f%5B%5D=230&f%5B%5D=521&f%5B%5D=214&f%5B%5D=226&f%5B%5D=225'
, 'software': '&f%5B%5D=238&f%5B%5D=371&f%5B%5D=370&f%5B%5D=369&f%5B%5D=368&f%5B%5D=367&f%5B%5D=366&f%5B%5D=365&f%5B%5D=239&f%5B%5D=361&f%5B%5D=360&f%5B%5D=359&f%5B%5D=241&f%5B%5D=349&f%5B%5D=348&f%5B%5D=347&f%5B%5D=346&f%5B%5D=939&f%5B%5D=1132&f%5B%5D=345&f%5B%5D=344&f%5B%5D=242&f%5B%5D=340&f%5B%5D=339&f%5B%5D=338&f%5B%5D=337&f%5B%5D=336&f%5B%5D=335&f%5B%5D=334&f%5B%5D=333&f%5B%5D=332&f%5B%5D=331&f%5B%5D=330&f%5B%5D=329&f%5B%5D=328&f%5B%5D=327&f%5B%5D=326&f%5B%5D=243&f%5B%5D=324&f%5B%5D=323&f%5B%5D=322&f%5B%5D=321&f%5B%5D=320&f%5B%5D=319&f%5B%5D=318&f%5B%5D=317&f%5B%5D=316&f%5B%5D=315&f%5B%5D=314&f%5B%5D=313&f%5B%5D=312&f%5B%5D=311&f%5B%5D=244&f%5B%5D=307&f%5B%5D=306&f%5B%5D=305&f%5B%5D=304&f%5B%5D=303&f%5B%5D=302&f%5B%5D=301&f%5B%5D=245&f%5B%5D=297&f%5B%5D=296&f%5B%5D=295&f%5B%5D=294&f%5B%5D=293&f%5B%5D=292&f%5B%5D=291&f%5B%5D=290&f%5B%5D=289&f%5B%5D=288&f%5B%5D=287&f%5B%5D=246&f%5B%5D=284&f%5B%5D=283&f%5B%5D=282&f%5B%5D=281&f%5B%5D=280&f%5B%5D=1129&f%5B%5D=278&f%5B%5D=277&f%5B%5D=276&f%5B%5D=275&f%5B%5D=274&f%5B%5D=247&f%5B%5D=272&f%5B%5D=271&f%5B%5D=270&f%5B%5D=248&f%5B%5D=1047&f%5B%5D=1048&f%5B%5D=268&f%5B%5D=267&f%5B%5D=266&f%5B%5D=265&f%5B%5D=1046&f%5B%5D=263&f%5B%5D=249&f%5B%5D=715&f%5B%5D=262&f%5B%5D=260&f%5B%5D=259&f%5B%5D=258&f%5B%5D=257&f%5B%5D=250&f%5B%5D=254&f%5B%5D=253&f%5B%5D=251'
, 'movies': '&f%5B%5D=783&f%5B%5D=197&f%5B%5D=1142&f%5B%5D=1131&f%5B%5D=198&f%5B%5D=155&f%5B%5D=784&f%5B%5D=184&f%5B%5D=158&f%5B%5D=924&f%5B%5D=172&f%5B%5D=830&f%5B%5D=1128&f%5B%5D=832&f%5B%5D=833&f%5B%5D=834&f%5B%5D=187&f%5B%5D=156&f%5B%5D=157&f%5B%5D=159&f%5B%5D=160&f%5B%5D=161&f%5B%5D=695&f%5B%5D=1124&f%5B%5D=922&f%5B%5D=931&f%5B%5D=1130&f%5B%5D=737&f%5B%5D=987&f%5B%5D=748&f%5B%5D=747&f%5B%5D=743&f%5B%5D=742&f%5B%5D=741&f%5B%5D=739&f%5B%5D=738'
, 'music': '&f%5B%5D=574&f%5B%5D=595&f%5B%5D=594&f%5B%5D=593&f%5B%5D=592&f%5B%5D=591&f%5B%5D=590&f%5B%5D=575&f%5B%5D=694&f%5B%5D=693&f%5B%5D=691&f%5B%5D=689&f%5B%5D=576&f%5B%5D=688&f%5B%5D=687&f%5B%5D=685&f%5B%5D=683&f%5B%5D=577&f%5B%5D=682&f%5B%5D=681&f%5B%5D=678&f%5B%5D=675&f%5B%5D=578&f%5B%5D=674&f%5B%5D=673&f%5B%5D=671&f%5B%5D=669&f%5B%5D=579&f%5B%5D=668&f%5B%5D=667&f%5B%5D=666&f%5B%5D=664&f%5B%5D=663&f%5B%5D=665&f%5B%5D=662&f%5B%5D=660&f%5B%5D=580&f%5B%5D=659&f%5B%5D=658&f%5B%5D=657&f%5B%5D=656&f%5B%5D=581&f%5B%5D=655&f%5B%5D=654&f%5B%5D=653&f%5B%5D=652&f%5B%5D=651&f%5B%5D=650&f%5B%5D=649&f%5B%5D=648&f%5B%5D=582&f%5B%5D=647&f%5B%5D=646&f%5B%5D=645&f%5B%5D=644&f%5B%5D=643&f%5B%5D=642&f%5B%5D=583&f%5B%5D=641&f%5B%5D=640&f%5B%5D=634&f%5B%5D=628&f%5B%5D=584&f%5B%5D=627&f%5B%5D=626&f%5B%5D=622&f%5B%5D=617&f%5B%5D=585&f%5B%5D=616&f%5B%5D=615&f%5B%5D=614&f%5B%5D=613&f%5B%5D=612&f%5B%5D=611&f%5B%5D=610&f%5B%5D=586&f%5B%5D=609&f%5B%5D=608&f%5B%5D=606&f%5B%5D=604&f%5B%5D=603&f%5B%5D=602&f%5B%5D=601&f%5B%5D=600&f%5B%5D=587&f%5B%5D=599&f%5B%5D=598&f%5B%5D=597&f%5B%5D=596'
, 'games': '&f%5B%5D=34&f%5B%5D=59&f%5B%5D=58&f%5B%5D=57&f%5B%5D=56&f%5B%5D=55&f%5B%5D=54&f%5B%5D=53&f%5B%5D=902&f%5B%5D=35&f%5B%5D=36&f%5B%5D=65&f%5B%5D=64&f%5B%5D=63&f%5B%5D=62&f%5B%5D=37&f%5B%5D=38&f%5B%5D=69&f%5B%5D=68&f%5B%5D=67&f%5B%5D=66&f%5B%5D=39&f%5B%5D=74&f%5B%5D=73&f%5B%5D=72&f%5B%5D=71&f%5B%5D=70&f%5B%5D=40&f%5B%5D=78&f%5B%5D=77&f%5B%5D=76&f%5B%5D=75&f%5B%5D=41&f%5B%5D=43&f%5B%5D=81&f%5B%5D=44&f%5B%5D=45&f%5B%5D=91&f%5B%5D=90&f%5B%5D=89&f%5B%5D=88&f%5B%5D=87&f%5B%5D=86&f%5B%5D=85&f%5B%5D=84&f%5B%5D=83&f%5B%5D=82&f%5B%5D=571&f%5B%5D=46&f%5B%5D=100&f%5B%5D=99&f%5B%5D=98&f%5B%5D=97&f%5B%5D=96&f%5B%5D=95&f%5B%5D=101&f%5B%5D=102&f%5B%5D=103&f%5B%5D=104&f%5B%5D=105&f%5B%5D=106'
}
name = 'UnionDHT'
url = 'http://uniondht.org'
MAX_PAGES_REQUEST = re.compile(r'<p style="float: left">.*? <b>\d</b> .*? <b>(\d+)</b></p>')
SEARCH_TEMPLATE = 'http://uniondht.org/tracker.php?o=10&nm='
AFTER_FINAL_PAGE = re.compile(r'<td class="row1 tCenter pad_8" colspan="12">.*?<\/td>')
GET_TORRENT = re.compile(r'<p><a href="(.+?)" class="for_adblock">')
GET_MAGNET = re.compile(r'href="(magnet:\?xt=urn:btih:.*announce)">?')
# def __init__(self):
# pass
def download_torrent(self, info):
logging.info('Starting download Torrent')
torrent_page = get_page_data(info)
# torrent_list = re.findall(self.GET_TORRENT, torrent_page)
magnet_link = re.findall(self.GET_MAGNET, torrent_page)[0]
print(magnet_link + " " + info)
# torrent_link = str((torrent_list)[0])
# a = get_page_data_encoded(torrent_link)
# with tempfile.NamedTemporaryFile(delete=False) as temp_file:
# temp_file.write(a)
# print(temp_file.name + ' ' + info)
def search(self, what='ncis', cat='all'):
logging.info('Beginning the search for {}'.format(what))
ati_what = what.replace(' ', '+')
page_offset = 0
request_url = self.SEARCH_TEMPLATE + '&nm=' + str(ati_what) + '&start=' + str(page_offset) + self.supported_categories[cat.lower()]
page_url = retrieve_url(request_url)
total_pages = int(re.findall(self.MAX_PAGES_REQUEST, page_url)[0])
while page_offset < (total_pages * 50):
page_link = self.SEARCH_TEMPLATE + '&nm=' + str(ati_what) + '&start=' + str(page_offset) + self.supported_categories[cat.lower()]
a = unionDHTParser(page_link)
a.start()
page_offset += 50
logging.debug('Next query will start on page: {}'.format(page_offset))
while (threading.active_count() > 1):
pass
if __name__ == '__main__':
logging.info('Running script directly')
a = uniondht()
# a.search('ncis', 'music')
# a.download_torrent('http://uniondht.org/topic/41952-ncis-official-soundtrack.html')
a.download_torrent('http://uniondht.org/topic/1551078-ncis-morskaya-politsiya-spetsotdel.html')
pass