Skip to content

Commit

Permalink
Merge pull request #2161 from CryZFix/dev
Browse files Browse the repository at this point in the history
Fixed "No chapters found" error in two sources
  • Loading branch information
dipu-bd authored Oct 12, 2023
2 parents 766be31 + 65e55f4 commit dfc37c7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 30 deletions.
21 changes: 4 additions & 17 deletions sources/en/f/freewebnovel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# -*- coding: utf-8 -*-
import unicodedata

from concurrent.futures import Future
from typing import List

from bs4 import BeautifulSoup, Tag

from lncrawl.models import Chapter, SearchResult
Expand All @@ -12,7 +9,7 @@


class FreeWebNovelCrawler(SearchableSoupTemplate, ChapterOnlySoupTemplate):
base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/"]
base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/", "https://innnovel.com/"]

def initialize(self) -> None:
self.init_executor(ratelimit=2)
Expand Down Expand Up @@ -80,19 +77,9 @@ def parse_authors(self, soup: BeautifulSoup):
yield a.text.strip()

def select_chapter_tags(self, soup: BeautifulSoup):
pages = soup.select("#indexselect > option")

futures: List[Future] = []
for page in pages:
url = self.absolute_url(page["value"])
f = self.executor.submit(self.get_soup, url)
futures.append(f)

self.resolve_futures(futures, desc="TOC", unit="page")
for i, future in enumerate(futures):
assert future.done(), f"Failed to get page {i + 1}"
soup = future.result()
yield from soup.select(".m-newest2 li > a")
chapters = soup.select("#idData")
for chapter in chapters:
yield from chapter.select("li > a")

def parse_chapter_item(self, tag: Tag, id: int) -> Chapter:
return Chapter(
Expand Down
27 changes: 14 additions & 13 deletions sources/en/w/wanderinginn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,22 @@ def read_novel_info(self):

# Extract volume-wise chapter entries
# Stops external links being selected as chapters
chapters = soup.select('div.entry-content a[href*="wanderinginn"]')

for a in chapters:
chap_id = len(self.chapters) + 1
vol_id = 1 + len(self.chapters) // 100
volumes = soup.select("div#table-of-contents div[id*='vol']")
for volume in volumes:
chapters = volume.select('div#table-of-contents .chapter-entry a[href*="wanderinginn"]')
vol_id = 1 + len(self.volumes)
if len(self.volumes) < vol_id:
self.volumes.append({"id": vol_id})
self.chapters.append(
{
"id": chap_id,
"volume": vol_id,
"url": self.absolute_url(a["href"]),
"title": a.text.strip() or ("Chapter %d" % chap_id),
}
)
for a in chapters:
chap_id = len(self.chapters) + 1
self.chapters.append(
{
"id": chap_id,
"volume": vol_id,
"url": self.absolute_url(a["href"]),
"title": a.text.strip() or ("Chapter %d" % chap_id),
}
)

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])
Expand Down

0 comments on commit dfc37c7

Please sign in to comment.