Skip to content

Commit

Permalink
feat: add source batoto
Browse files Browse the repository at this point in the history
  • Loading branch information
potatoeggy committed Aug 1, 2024
1 parent 264701b commit 208ddd9
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 0 deletions.
1 change: 1 addition & 0 deletions mandown/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import types

from . import (
source_batoto,
source_blogtruyenmoi,
source_comixextra,
source_kuaikanmanhua,
Expand Down
103 changes: 103 additions & 0 deletions mandown/sources/source_batoto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
Source file for bato.to
"""

import html
import json
import re
from typing import cast

import requests
from bs4 import BeautifulSoup

from ..base import BaseChapter, BaseMetadata
from .base_source import BaseSource


class BatotoSource(BaseSource):
name = "Bato.to"
domains = ["https://bato.to"]

def __init__(self, url: str) -> None:
super().__init__(url)
self.id = self.url_to_id(url) # we make a GET request here which is not ideal
self.url = f"https://bato.to/title/{self.id}"
self._scripts: str | None = None

def fetch_metadata(self) -> BaseMetadata:
soup = BeautifulSoup(self._get_scripts(), "lxml")

cover_art_el = soup.select_one("img.not-prose")
title = cast(str, cover_art_el["alt"])
cover_art = cast(str, cover_art_el["src"])
authors = list({strip_parentheses(item.text) for item in soup.select("div.mt-2 a")})
genres = [
strip_parentheses(g.text)
for g in soup.select(
"div.space-y-2 > div.flex.items-center.flex-wrap > span > :nth-child(1)"
)
]
description = soup.select_one("astro-island > div > .prose > .limit-html-p").text.strip()

return BaseMetadata(title, authors, self.url, genres, description, cover_art)

def fetch_chapter_list(self) -> list[BaseChapter]:
soup = BeautifulSoup(self._get_scripts(), "lxml")

chapters: list[BaseChapter] = []
for item in soup.select('div[name="chapter-list"] div.space-x-1 > a:nth-child(1)'):
link = cast(str, item["href"])
title = item.text
chapters.append(BaseChapter(title, f"https://bato.to{link}?load=2"))
return chapters

def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]:
soup = BeautifulSoup(requests.get(chapter.url).text, "lxml")

"""
It looks something like:
{'pageOpts': [0, {'load': [0, '2'], 'marg': [0, '0'], 'zoom': [0, '0']}], 'imageFiles': [1,
'[[0,"https://xfs-n03.xfsbb.com/comic/7006/fbf/65ac7d07a811b44f5e9abfbf/45869815_2560_2824_374174.webp"]]'
], 'urlP': [0, 0]}
"""
data = json.loads(
html.unescape(
soup.select_one('astro-island[component-url^="/_astro/ImageList"]')["props"]
)
)

images: list[str] = []
for image in json.loads(data["imageFiles"][1]):
images.append(image[1])
return images

def _get_scripts(self) -> str:
if self._scripts:
return self._scripts

self._scripts = requests.get(self.url).text or ""
return self._scripts

@classmethod
def url_to_id(cls, url: str) -> str:
items = list(filter(None, url.split("/")))

# e.g., for https://bato.to/title/115663-my-not-so-fair-lady-is-doomed-but-not-if-i-can-help-it-official/2950514-ch_15
# we want "115663-my-not-so-fair-lady-is-doomed-but-not-if-i-can-help-it-official"

return items[3]

@staticmethod
def check_url(url: str) -> bool:
return bool(re.match(r"https://bato.to/title/.*", url))


def get_class() -> type[BaseSource]:
return BatotoSource


def strip_parentheses(text: str) -> str:
index = text.find("(")
if index != -1:
return text[:index].strip()
return text.strip()
17 changes: 17 additions & 0 deletions tests/test_source_batoto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from common import is_source_working, skip_in_ci

URL = "https://bato.to/title/148152-calvin-hobbes"
COVER_URL = "https://xfs-n02.xfsbb.com/thumb/W600/ampi/338/338d61287365ec7247ce40b729ebf7de865862fc_474_503_48513.jpeg"
DESCRIPTION = "Calvin and Hobbes follows the humorous antics of the title characters: Calvin, a precocious, mischievous, and adventurous six-year-old boy; and Hobbes, his sardonic stuffed tiger. Set in the suburban United States of the 1980s and 1990s, the strip depicts Calvin's frequent flights of fancy and friendship with Hobbes. It also examines Calvin's relationships with his long-suffering parents and with his classmates, especially his neighbor Susie Derkins. Hobbes's dual nature is a defining motif for the strip: to Calvin, Hobbes is a living anthropomorphic tiger, while all the other characters seem to see Hobbes as an inanimate stuffed toy\u00e2\u0080\u0094though Watterson has not clarified exactly how Hobbes is perceived by others. Though the series does not frequently mention specific political figures or ongoing events, it does explore broad issues like environmentalism, public education, and philosophical quandaries"


@skip_in_ci
def test_calvinhobbes() -> None:
return is_source_working(
URL,
title="Calvin & Hobbes",
authors=["Bill Watterson"],
genres=["Comic", "Kodomo", "Shoujo", "Shounen", "Comedy", "Kids", "Slice of Life"],
description=DESCRIPTION,
cover_art=COVER_URL,
)

0 comments on commit 208ddd9

Please sign in to comment.