-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
264701b
commit 208ddd9
Showing
3 changed files
with
121 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
""" | ||
Source file for bato.to | ||
""" | ||
|
||
import html | ||
import json | ||
import re | ||
from typing import cast | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
from ..base import BaseChapter, BaseMetadata | ||
from .base_source import BaseSource | ||
|
||
|
||
class BatotoSource(BaseSource): | ||
name = "Bato.to" | ||
domains = ["https://bato.to"] | ||
|
||
def __init__(self, url: str) -> None: | ||
super().__init__(url) | ||
self.id = self.url_to_id(url) # we make a GET request here which is not ideal | ||
self.url = f"https://bato.to/title/{self.id}" | ||
self._scripts: str | None = None | ||
|
||
def fetch_metadata(self) -> BaseMetadata: | ||
soup = BeautifulSoup(self._get_scripts(), "lxml") | ||
|
||
cover_art_el = soup.select_one("img.not-prose") | ||
title = cast(str, cover_art_el["alt"]) | ||
cover_art = cast(str, cover_art_el["src"]) | ||
authors = list({strip_parentheses(item.text) for item in soup.select("div.mt-2 a")}) | ||
genres = [ | ||
strip_parentheses(g.text) | ||
for g in soup.select( | ||
"div.space-y-2 > div.flex.items-center.flex-wrap > span > :nth-child(1)" | ||
) | ||
] | ||
description = soup.select_one("astro-island > div > .prose > .limit-html-p").text.strip() | ||
|
||
return BaseMetadata(title, authors, self.url, genres, description, cover_art) | ||
|
||
def fetch_chapter_list(self) -> list[BaseChapter]: | ||
soup = BeautifulSoup(self._get_scripts(), "lxml") | ||
|
||
chapters: list[BaseChapter] = [] | ||
for item in soup.select('div[name="chapter-list"] div.space-x-1 > a:nth-child(1)'): | ||
link = cast(str, item["href"]) | ||
title = item.text | ||
chapters.append(BaseChapter(title, f"https://bato.to{link}?load=2")) | ||
return chapters | ||
|
||
def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]: | ||
soup = BeautifulSoup(requests.get(chapter.url).text, "lxml") | ||
|
||
""" | ||
It looks something like: | ||
{'pageOpts': [0, {'load': [0, '2'], 'marg': [0, '0'], 'zoom': [0, '0']}], 'imageFiles': [1, | ||
'[[0,"https://xfs-n03.xfsbb.com/comic/7006/fbf/65ac7d07a811b44f5e9abfbf/45869815_2560_2824_374174.webp"]]' | ||
], 'urlP': [0, 0]} | ||
""" | ||
data = json.loads( | ||
html.unescape( | ||
soup.select_one('astro-island[component-url^="/_astro/ImageList"]')["props"] | ||
) | ||
) | ||
|
||
images: list[str] = [] | ||
for image in json.loads(data["imageFiles"][1]): | ||
images.append(image[1]) | ||
return images | ||
|
||
def _get_scripts(self) -> str: | ||
if self._scripts: | ||
return self._scripts | ||
|
||
self._scripts = requests.get(self.url).text or "" | ||
return self._scripts | ||
|
||
@classmethod | ||
def url_to_id(cls, url: str) -> str: | ||
items = list(filter(None, url.split("/"))) | ||
|
||
# e.g., for https://bato.to/title/115663-my-not-so-fair-lady-is-doomed-but-not-if-i-can-help-it-official/2950514-ch_15 | ||
# we want "115663-my-not-so-fair-lady-is-doomed-but-not-if-i-can-help-it-official" | ||
|
||
return items[3] | ||
|
||
@staticmethod | ||
def check_url(url: str) -> bool: | ||
return bool(re.match(r"https://bato.to/title/.*", url)) | ||
|
||
|
||
def get_class() -> type[BaseSource]: | ||
return BatotoSource | ||
|
||
|
||
def strip_parentheses(text: str) -> str: | ||
index = text.find("(") | ||
if index != -1: | ||
return text[:index].strip() | ||
return text.strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from common import is_source_working, skip_in_ci | ||
|
||
URL = "https://bato.to/title/148152-calvin-hobbes" | ||
COVER_URL = "https://xfs-n02.xfsbb.com/thumb/W600/ampi/338/338d61287365ec7247ce40b729ebf7de865862fc_474_503_48513.jpeg" | ||
DESCRIPTION = "Calvin and Hobbes follows the humorous antics of the title characters: Calvin, a precocious, mischievous, and adventurous six-year-old boy; and Hobbes, his sardonic stuffed tiger. Set in the suburban United States of the 1980s and 1990s, the strip depicts Calvin's frequent flights of fancy and friendship with Hobbes. It also examines Calvin's relationships with his long-suffering parents and with his classmates, especially his neighbor Susie Derkins. Hobbes's dual nature is a defining motif for the strip: to Calvin, Hobbes is a living anthropomorphic tiger, while all the other characters seem to see Hobbes as an inanimate stuffed toy\u00e2\u0080\u0094though Watterson has not clarified exactly how Hobbes is perceived by others. Though the series does not frequently mention specific political figures or ongoing events, it does explore broad issues like environmentalism, public education, and philosophical quandaries" | ||
|
||
|
||
@skip_in_ci | ||
def test_calvinhobbes() -> None: | ||
return is_source_working( | ||
URL, | ||
title="Calvin & Hobbes", | ||
authors=["Bill Watterson"], | ||
genres=["Comic", "Kodomo", "Shoujo", "Shounen", "Comedy", "Kids", "Slice of Life"], | ||
description=DESCRIPTION, | ||
cover_art=COVER_URL, | ||
) |