Merge pull request #2161 from CryZFix/dev

Fixed "No chapters found" error in two sources
dipu-bd · Oct 12, 2023 · dfc37c7 · dfc37c7
2 parents 766be31 + 65e55f4
commit dfc37c7
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 30 deletions.
diff --git a/sources/en/f/freewebnovel.py b/sources/en/f/freewebnovel.py
@@ -1,9 +1,6 @@
 # -*- coding: utf-8 -*-
 import unicodedata
 
-from concurrent.futures import Future
-from typing import List
-
 from bs4 import BeautifulSoup, Tag
 
 from lncrawl.models import Chapter, SearchResult
@@ -12,7 +9,7 @@
 
 
 class FreeWebNovelCrawler(SearchableSoupTemplate, ChapterOnlySoupTemplate):
-    base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/"]
+    base_url = ["https://freewebnovel.com/", "https://bednovel.com/", "https://innread.com/", "https://innnovel.com/"]
 
     def initialize(self) -> None:
         self.init_executor(ratelimit=2)
@@ -80,19 +77,9 @@ def parse_authors(self, soup: BeautifulSoup):
             yield a.text.strip()
 
     def select_chapter_tags(self, soup: BeautifulSoup):
-        pages = soup.select("#indexselect > option")
-
-        futures: List[Future] = []
-        for page in pages:
-            url = self.absolute_url(page["value"])
-            f = self.executor.submit(self.get_soup, url)
-            futures.append(f)
-
-        self.resolve_futures(futures, desc="TOC", unit="page")
-        for i, future in enumerate(futures):
-            assert future.done(), f"Failed to get page {i + 1}"
-            soup = future.result()
-            yield from soup.select(".m-newest2 li > a")
+        chapters = soup.select("#idData")
+        for chapter in chapters:
+            yield from chapter.select("li > a")
 
     def parse_chapter_item(self, tag: Tag, id: int) -> Chapter:
         return Chapter(

diff --git a/sources/en/w/wanderinginn.py b/sources/en/w/wanderinginn.py
@@ -33,21 +33,22 @@ def read_novel_info(self):
 
         # Extract volume-wise chapter entries
         # Stops external links being selected as chapters
-        chapters = soup.select('div.entry-content a[href*="wanderinginn"]')
-
-        for a in chapters:
-            chap_id = len(self.chapters) + 1
-            vol_id = 1 + len(self.chapters) // 100
+        volumes = soup.select("div#table-of-contents div[id*='vol']")
+        for volume in volumes:
+            chapters = volume.select('div#table-of-contents .chapter-entry a[href*="wanderinginn"]')
+            vol_id = 1 + len(self.volumes)
             if len(self.volumes) < vol_id:
                 self.volumes.append({"id": vol_id})
-            self.chapters.append(
-                {
-                    "id": chap_id,
-                    "volume": vol_id,
-                    "url": self.absolute_url(a["href"]),
-                    "title": a.text.strip() or ("Chapter %d" % chap_id),
-                }
-            )
+            for a in chapters:
+                chap_id = len(self.chapters) + 1
+                self.chapters.append(
+                    {
+                        "id": chap_id,
+                        "volume": vol_id,
+                        "url": self.absolute_url(a["href"]),
+                        "title": a.text.strip() or ("Chapter %d" % chap_id),
+                    }
+                )
 
     def download_chapter_body(self, chapter):
         soup = self.get_soup(chapter["url"])