Skip to content

Commit

Permalink
chore: hijack requests to fake a user agent
Browse files Browse the repository at this point in the history
  • Loading branch information
potatoeggy committed Oct 16, 2024
1 parent 5a7e5e3 commit 1ff77e6
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 59 deletions.
13 changes: 11 additions & 2 deletions mandown/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Iterator, Sequence

import filetype
import requests
import requests as RealRequests
from natsort import natsorted

from .base import BaseChapter, BaseMetadata
Expand All @@ -31,7 +31,7 @@ def async_download_image(data: AsyncDownloadImageInput) -> None:
name = filename or url.split("/")[-1]
dest_file = dest_folder / name

res = requests.get(url, headers=headers, timeout=5)
res = RealRequests.get(url, headers=headers, timeout=5)

if res.status_code != 200:
# there is no clean way to raise an error in a pool
Expand Down Expand Up @@ -177,3 +177,12 @@ def discover_local_images(path: Path | str) -> dict[str, list[Path]]:
for chap in sorted(path.iterdir()) # iterdir does not guarantee any order
if chap.is_dir() # force explosion for readability
}


USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36" # noqa: E501


class requests:
@staticmethod
def get(url: str) -> RealRequests.Response:
return RealRequests.get(url, headers={"User-Agent": USER_AGENT})
13 changes: 13 additions & 0 deletions mandown/sources/base_source.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ..base import BaseChapter, BaseMetadata
from ..io import requests


class BaseSource:
Expand All @@ -13,9 +14,21 @@ class BaseSource:
_metadata: BaseMetadata | None = None
_chapters: list[BaseChapter] = []

_scripts: str | None = None

def __init__(self, url: str):
self.url = url

def _get_scripts(self) -> str:
"""
Legacy method for fetching the HTML of `self.url`.
"""
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@property
def metadata(self) -> BaseMetadata:
"""
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_batoto.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
images.append(image[1])
return images

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_blogtruyenmoi.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
soup = BeautifulSoup(requests.get(chapter.url).text, "lxml")
return [el["src"] for el in soup.select("article#content > img")]

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))
Expand Down
9 changes: 1 addition & 8 deletions mandown/sources/source_comixextra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import re

import requests
from bs4 import BeautifulSoup

from ..base import BaseChapter, BaseMetadata
from ..io import requests
from .base_source import BaseSource


Expand Down Expand Up @@ -58,13 +58,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:

return [i["src"] for i in BeautifulSoup(text, "lxml").select(".chapter-container img")]

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_kuaikanmanhua.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
strings: list[str] = json.loads(constructed)
return [s for s in strings if isinstance(s, str)] # there may be ints

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_mangakakalot.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
images.append(i["src"])
return images

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
*_, last_item = filter(None, url.split("/"))
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_manganato.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
images.append(i["src"])
return images

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
*_, last_item = filter(None, url.split("/"))
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_mangasee.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
def check_url(url: str) -> bool:
return bool(re.match(r"https://mangasee123.com/manga/.*", url))

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
# converts page url to id
Expand Down
7 changes: 0 additions & 7 deletions mandown/sources/source_manhuaes.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,6 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
soup = BeautifulSoup(requests.get(chapter.url).text, "lxml")
return [el["data-src"] for el in soup.select("img.wp-manga-chapter-img")]

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))
Expand Down

0 comments on commit 1ff77e6

Please sign in to comment.