Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 403 Forbidden by providing most common user agents #91

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
import unittest

from lxml import html
from requests import get

from tpb.tpb import TPB, Search, Recent, Top, List, Paginated
from tpb.constants import ConstantType, Constants, ORDERS, CATEGORIES
from tpb.utils import URL
from tpb.utils import URL, headers

if sys.version_info >= (3, 0):
from urllib.request import urlopen
from tests.cases import RemoteTestCase
unicode = str
else:
from urllib2 import urlopen
from cases import RemoteTestCase


Expand Down Expand Up @@ -106,8 +105,12 @@ def test_creation_dates(self):
self.assertTrue(diff > 1)

def test_torrent_rows(self):
request = urlopen(str(self.torrents.url))
document = html.parse(request)
request = get(
str(self.torrents.url),
headers=headers(),
stream=True
)
document = html.parse(request.raw)
rows = self.torrents._get_torrent_rows(document.getroot())
self.assertEqual(len(rows), 30)

Expand Down
8 changes: 4 additions & 4 deletions tpb/tpb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import sys
import time

from .utils import URL
from .utils import URL, headers

from requests import get

Expand Down Expand Up @@ -56,7 +56,7 @@ def items(self):
Request URL and parse response. Yield a ``Torrent`` for every torrent
on page.
"""
request = get(str(self.url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
request = get(str(self.url), headers=headers())
root = html.fromstring(request.text)
items = [self._build_torrent(row) for row in
self._get_torrent_rows(root)]
Expand Down Expand Up @@ -342,7 +342,7 @@ def __init__(self, title, url, category, sub_category, magnet_link,
@property
def info(self):
if self._info is None:
request = get(str(self.url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
request = get(str(self.url), headers=headers())
root = html.fromstring(request.text)
info = root.cssselect('#details > .nfo > pre')[0].text_content()
self._info = info
Expand All @@ -353,7 +353,7 @@ def files(self):
if not self._files:
path = '/ajax_details_filelist.php?id={id}'.format(id=self.id)
url = self.url.path(path)
request = get(str(url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
request = get(str(url), headers=headers())
root = html.fromstring(request.text)
rows = root.findall('.//tr')
for row in rows:
Expand Down
30 changes: 30 additions & 0 deletions tpb/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
import random

from purl import URL as PURL

Expand Down Expand Up @@ -61,3 +62,32 @@ def _segment(cls, segment):
fget=lambda x: cls._get_segment(x, segment),
fset=lambda x, v: cls._set_segment(x, segment, v),
)


def headers():
"""
The Pirate Bay blocks requests (403 Forbidden)
basing on User-Agent header, so it's probably better to rotate them.
User-Agents taken from:
https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
"""
return {
"User-Agent": random.choice(USER_AGENTS),
"origin_req_host": "thepiratebay.se",
}


USER_AGENTS = (
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3112.113 Safari/537.36',
)