From 0bbae76a202e9a82f62830a52446248825bdfa89 Mon Sep 17 00:00:00 2001 From: CryZFix Date: Mon, 9 Oct 2023 13:58:48 +0400 Subject: [PATCH 1/4] Fixed source: Free Web Novel Fix error "No chapters found" --- sources/en/f/freewebnovel.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sources/en/f/freewebnovel.py b/sources/en/f/freewebnovel.py index 73a5498a0..3af7159a3 100644 --- a/sources/en/f/freewebnovel.py +++ b/sources/en/f/freewebnovel.py @@ -80,19 +80,9 @@ def parse_authors(self, soup: BeautifulSoup): yield a.text.strip() def select_chapter_tags(self, soup: BeautifulSoup): - pages = soup.select("#indexselect > option") - - futures: List[Future] = [] - for page in pages: - url = self.absolute_url(page["value"]) - f = self.executor.submit(self.get_soup, url) - futures.append(f) - - self.resolve_futures(futures, desc="TOC", unit="page") - for i, future in enumerate(futures): - assert future.done(), f"Failed to get page {i + 1}" - soup = future.result() - yield from soup.select(".m-newest2 li > a") + chapters = soup.select("#idData") + for chapter in chapters: + yield from chapter.select("li > a") def parse_chapter_item(self, tag: Tag, id: int) -> Chapter: return Chapter( From 0ba49c03db91c0809b611ca156da54f0ced89fa0 Mon Sep 17 00:00:00 2001 From: CryZFix Date: Mon, 9 Oct 2023 14:10:23 +0400 Subject: [PATCH 2/4] Remove not used imports --- sources/en/f/freewebnovel.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sources/en/f/freewebnovel.py b/sources/en/f/freewebnovel.py index 3af7159a3..024131731 100644 --- a/sources/en/f/freewebnovel.py +++ b/sources/en/f/freewebnovel.py @@ -1,9 +1,6 @@ # -*- coding: utf-8 -*- import unicodedata -from concurrent.futures import Future -from typing import List - from bs4 import BeautifulSoup, Tag from lncrawl.models import Chapter, SearchResult From 3842039eea05dbd567a89bc952cc854fcdd48d41 Mon Sep 17 00:00:00 2001 From: CryZFix Date: Mon, 9 Oct 2023 14:36:07 +0400 Subject: [PATCH 3/4] Added mirror of site --- sources/en/f/freewebnovel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/en/f/freewebnovel.py b/sources/en/f/freewebnovel.py index 024131731..f4d62d9f8 100644 --- a/sources/en/f/freewebnovel.py +++ b/sources/en/f/freewebnovel.py @@ -9,7 +9,7 @@ class FreeWebNovelCrawler(SearchableSoupTemplate, ChapterOnlySoupTemplate): - base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/"] + base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/", "https://innnovel.com/"] def initialize(self) -> None: self.init_executor(ratelimit=2) From 65e55f41f331b0ad388fd66f44bf018731714690 Mon Sep 17 00:00:00 2001 From: CryZFix Date: Wed, 11 Oct 2023 13:04:26 +0400 Subject: [PATCH 4/4] Fixed source: "WanderingInn" Fixed No chapters found error Added correctly detect volumes --- sources/en/w/wanderinginn.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/sources/en/w/wanderinginn.py b/sources/en/w/wanderinginn.py index d6ddaf8e7..8602cc7fb 100644 --- a/sources/en/w/wanderinginn.py +++ b/sources/en/w/wanderinginn.py @@ -33,21 +33,22 @@ def read_novel_info(self): # Extract volume-wise chapter entries # Stops external links being selected as chapters - chapters = soup.select('div.entry-content a[href*="wanderinginn"]') - - for a in chapters: - chap_id = len(self.chapters) + 1 - vol_id = 1 + len(self.chapters) // 100 + volumes = soup.select("div#table-of-contents div[id*='vol']") + for volume in volumes: + chapters = volume.select('div#table-of-contents .chapter-entry a[href*="wanderinginn"]') + vol_id = 1 + len(self.volumes) if len(self.volumes) < vol_id: self.volumes.append({"id": vol_id}) - self.chapters.append( - { - "id": chap_id, - "volume": vol_id, - "url": self.absolute_url(a["href"]), - "title": a.text.strip() or ("Chapter %d" % chap_id), - } - ) + for a in chapters: + chap_id = len(self.chapters) + 1 + self.chapters.append( + { + "id": chap_id, + "volume": vol_id, + "url": self.absolute_url(a["href"]), + "title": a.text.strip() or ("Chapter %d" % chap_id), + } + ) def download_chapter_body(self, chapter): soup = self.get_soup(chapter["url"])