From 52b591caaab7cd89f4c7a18ffaee4b06277f7e0e Mon Sep 17 00:00:00 2001 From: Bohdan Bobrowski Date: Thu, 5 Sep 2024 15:54:10 +0200 Subject: [PATCH] generate_epub removal from all crawlers! --- blog2epub/blog2epub_gui.py | 11 ---------- blog2epub/common/book.py | 2 +- blog2epub/common/settings.py | 3 +-- blog2epub/crawlers/abstract.py | 12 +---------- blog2epub/crawlers/default.py | 30 ++------------------------- blog2epub/crawlers/nrdblog_cmosnet.py | 3 --- blog2epub/crawlers/universal.py | 3 --- blog2epub/crawlers/zeissikonveb.py | 3 --- 8 files changed, 5 insertions(+), 62 deletions(-) diff --git a/blog2epub/blog2epub_gui.py b/blog2epub/blog2epub_gui.py index 31a5c6e..fc7105f 100644 --- a/blog2epub/blog2epub_gui.py +++ b/blog2epub/blog2epub_gui.py @@ -8,7 +8,6 @@ import time from datetime import datetime from itertools import cycle -from pathlib import Path from threading import Thread from typing import List from urllib import parse @@ -56,16 +55,6 @@ now = datetime.now() date_time = now.strftime("%Y-%m-%d[%H.%M.%S]") -logging_filename = os.path.join( - str(Path.home()), ".blog2epub", f"blog2epub_{date_time}.log" -) - -logging.basicConfig( - filename=logging_filename, - encoding="utf-8", - level=logging.DEBUG, - format="%(asctime)s - %(levelname)s - %(message)s", -) class UrlTextInput(MDTextField): diff --git a/blog2epub/common/book.py b/blog2epub/common/book.py index ad5823e..255f6e3 100755 --- a/blog2epub/common/book.py +++ b/blog2epub/common/book.py @@ -89,7 +89,7 @@ def __init__( self._set_locale() self.chapters: List[Chapter] = [] self.table_of_contents: List[EpubHtml] = [] - self.file_name_prefix: str = book_data.file_name_prefix + self.file_name_prefix: Optional[str] = book_data.file_name_prefix self.file_name: str = self._get_new_file_name() self.destination_folder = destination_folder diff --git a/blog2epub/common/settings.py b/blog2epub/common/settings.py index a0037aa..b809d2a 100644 --- a/blog2epub/common/settings.py +++ b/blog2epub/common/settings.py @@ -1,5 +1,4 @@ import os -from pathlib import Path from typing import Optional import yaml @@ -8,7 +7,7 @@ class Blog2EpubSettings: - def __init__(self, path: Optional[str]): + def __init__(self, path: str): self.path = path self._prepare_path() self.settings_file = os.path.join(self.path, "blog2epub.yml") diff --git a/blog2epub/crawlers/abstract.py b/blog2epub/crawlers/abstract.py index a44cb20..f6b2518 100644 --- a/blog2epub/crawlers/abstract.py +++ b/blog2epub/crawlers/abstract.py @@ -2,7 +2,6 @@ import os from abc import ABC, abstractmethod from datetime import datetime -from pathlib import Path from typing import List, Optional, Dict import re @@ -14,7 +13,7 @@ from blog2epub.common.book import Book from blog2epub.common.interfaces import EmptyInterface from blog2epub.common.language_tools import translate_month -from blog2epub.models.book import ArticleModel, DirModel +from blog2epub.models.book import DirModel from blog2epub.models.configuration import ConfigurationModel from blog2epub.common.crawler import ( prepare_file_name, @@ -80,15 +79,6 @@ def __init__( def crawl(self): pass - @abstractmethod - def generate_ebook( - self, - articles: List[ArticleModel], - destination_folder: str = ".", - file_name: Optional[str] = None, - ): # TODO: this should be removed I guess crawler's job is just to prepare data - pass - class Article: """ diff --git a/blog2epub/crawlers/default.py b/blog2epub/crawlers/default.py index 9017120..a0defe5 100644 --- a/blog2epub/crawlers/default.py +++ b/blog2epub/crawlers/default.py @@ -8,9 +8,7 @@ from pydantic import HttpUrl from blog2epub.crawlers.abstract import AbstractCrawler -from blog2epub.common.book import Book from blog2epub.models.book import BookModel, DirModel, ArticleModel, ImageModel -from blog2epub.models.configuration import ConfigurationModel class DefaultCrawler(AbstractCrawler): @@ -198,7 +196,8 @@ def _articles_loop(self, content): ): art.process() self.images = self.images + art.images - self.interface.print(str(len(self.articles) + 1) + ". " + art.title) + art_no = str(len(self.articles) + 1) + self.interface.print(f"{art_no}. {art.title}") if self.start: self.end = art.date else: @@ -240,28 +239,3 @@ def crawl(self): self._check_limit() self.active = False self.subtitle = self._get_subtitle() - - def generate_ebook( - self, - articles: List[ArticleModel], - destination_folder: str = ".", - file_name: Optional[str] = None, - ): - if articles: - self.book = Book( - book_data=self, - configuration=ConfigurationModel( - language=self.language or "en", - ), - interface=self.interface, - destination_folder=destination_folder, - ) - self.book.save( - articles=articles, - destination_folder=destination_folder, - file_name=file_name, - ) - return True - else: - self.interface.print("No articles found.") - return False diff --git a/blog2epub/crawlers/nrdblog_cmosnet.py b/blog2epub/crawlers/nrdblog_cmosnet.py index d0a7be1..ae35050 100644 --- a/blog2epub/crawlers/nrdblog_cmosnet.py +++ b/blog2epub/crawlers/nrdblog_cmosnet.py @@ -6,6 +6,3 @@ class NrdblogCmosEuCrawler(AbstractCrawler): def crawl(self): pass - - def generate_ebook(self, **kwargs): - pass diff --git a/blog2epub/crawlers/universal.py b/blog2epub/crawlers/universal.py index 797b0fb..ff78ca1 100644 --- a/blog2epub/crawlers/universal.py +++ b/blog2epub/crawlers/universal.py @@ -19,6 +19,3 @@ def _get_sitemap_xml(self): def crawl(self): pass - - def generate_ebook(self, **kwargs): - pass diff --git a/blog2epub/crawlers/zeissikonveb.py b/blog2epub/crawlers/zeissikonveb.py index e1cb3a9..66ead0e 100644 --- a/blog2epub/crawlers/zeissikonveb.py +++ b/blog2epub/crawlers/zeissikonveb.py @@ -6,6 +6,3 @@ class ZeissIkonVEBCrawler(AbstractCrawler): def crawl(self): pass - - def generate_ebook(self, **kwargs): - pass