unittesting and some other fixes

bohdanbobrowski · Oct 31, 2024 · 55fadd3 · 55fadd3
1 parent 6390dfd
commit 55fadd3
Show file tree

Hide file tree

Showing 10 changed files with 205 additions and 14 deletions.
diff --git a/.github/workflows/code_check.yml b/.github/workflows/code_check.yml
@@ -25,3 +25,6 @@ jobs:
     - name: Static code analysys with mypy
       run: |
         poetry run mypy . 
+    - name: Unit testing
+      run: |
+        poetry run pytest ./tests 
diff --git a/.gitignore b/.gitignore
@@ -15,4 +15,5 @@ _site
 /.jekyll-cache/
 /.sass-cache/
 /vendor/
-.bundle/
+.bundle/
+/htmlcov
diff --git a/README.md b/README.md
@@ -113,6 +113,13 @@ Before you start, you'll need to install buildozer following this [installation
     poetry run blog2epub poznanskiehistorie.blogspot.com -q=100
     poetry run blog2epub classicameras.blogspot.com --limit=10 --no-images
 
+## Running tests
+
+    pytest ./tests
+    pytest --cov=blog2epub ./tests
+    pytest --cov=blog2epub --cov-report=html ./tests
+
+
 ## Current version
 
 ### [v1.4.0]
@@ -121,12 +128,12 @@ Before you start, you'll need to install buildozer following this [installation
 - [X] mypy and ruff pipeline job (via github Actions)
 - [X] Android build
 - [ ] unit testing - at least for some part of the code
-- [ ] crawlers refactor - some part is done
+- [x] crawlers refactor - some part is done
 - [ ] fix minor Android bugs
 
 
 [&raquo; Complete Change Log here &laquo;](https://github.com/bohdanbobrowski/blog2epub/blob/master/CHANGELOG.md)
 
 ## Project backlog
 
-And finally, a list known bugs and future plans for some new functions and enhancements: [BACKLOG.md](https://github.com/bohdanbobrowski/blog2epub/blob/master/BACKLOG.md)
+And finally, a list known bugs and future plans for some new functions and enhancements: [BACKLOG.md](https://github.com/bohdanbobrowski/blog2epub/blob/master/BACKLOG.md)
diff --git a/blog2epub/blog2epub_gui.py b/blog2epub/blog2epub_gui.py
@@ -581,6 +581,7 @@ def success(self, ebook: Book):
         success_content.add_widget(epub_cover_image_widget)
 
         def open_ebook_in_default_viewer(inst):
+            self.interface.print(f"Opening file: {ebook.file_full_path} ({platform})")
             if platform == "win":
                 os.startfile(ebook.file_full_path)
             elif platform == "android":

diff --git a/blog2epub/common/crawler.py b/blog2epub/common/crawler.py
@@ -2,19 +2,24 @@
 import ssl
 from urllib.error import URLError
 
+
 from blog2epub.common.exceptions import BadUrlException
 
 ssl._create_default_https_context = ssl._create_stdlib_context  # type: ignore
 
 
 def prepare_url(url: str) -> str:
-    return url.replace("http:", "").replace("https:", "").strip("/")
+    result = url.replace("http:", "").replace("https:", "").strip("/")
+    return result.split("/")[0]
 
 
 def prepare_file_name(file_name: str | None, url: str) -> str:
     if file_name:
         return file_name
-    return url.replace("/", "_")
+    result = url.lower()
+    for x in ["/", ",", "."]:
+        result = result.replace(x, "_")
+    return result
 
 
 def prepare_url_to_crawl(url: str) -> str:
@@ -25,7 +30,7 @@ def prepare_url_to_crawl(url: str) -> str:
         raise BadUrlException
 
 
-def prepare_port(url):
+def prepare_port(url: str) -> int:
     if url.startswith("https://"):
         return 443
     return 80
diff --git a/blog2epub/crawlers/wordpress.py b/blog2epub/crawlers/wordpress.py
@@ -58,7 +58,9 @@ def _atom_feed_loop(self):
                 self.images = self.images + art.images
                 self.articles.append(art)
                 self._add_tags(art.tags)
-                if self.limit and len(self.articles) >= self.limit:
+                if self.configuration.limit and len(self.articles) >= int(
+                    self.configuration.limit
+                ):
                     next_page = None
                     break
             if next_page:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,7 @@ types-pyinstaller = "^6.10.0.20240812"
 types-python-dateutil = "^2.9.0.20240821"
 types-requests = "^2.32.0.20240712"
 cython = "^3.0.11"
+pytest-cov = "^6.0.0"
 
 [tool.poetry.scripts]
 blog2epub = "blog2epub.blog2epub_cli:main"

diff --git a/tests/unit/blog2epub/common/test_crawler.py b/tests/unit/blog2epub/common/test_crawler.py
@@ -0,0 +1,79 @@
+import unittest
+from unittest.mock import patch
+
+from blog2epub.common.crawler import (
+    prepare_url,
+    prepare_file_name,
+    prepare_url_to_crawl,
+    prepare_port,
+)
+
+
+class MockRequestResult:
+    def __init__(self, url):
+        self.url = url
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        return self
+
+    def geturl(self):
+        return self.url
+
+
+class TestCommonCrawler(unittest.TestCase):
+    def setUp(self):
+        # Given:
+        self.given_domain = "example.com"
+        self.given_http_url = "http://example.com"
+        self.given_https_url = "https://example.com"
+
+    def test_prepare_url(self):
+        # When:
+        result_0 = prepare_url(self.given_domain)
+        result_1 = prepare_url(self.given_http_url)
+        result_2 = prepare_url(self.given_https_url)
+        # Then:
+        assert result_0 == self.given_domain
+        assert result_1 == self.given_domain
+        assert result_2 == self.given_domain
+
+    def test_prepare_url_always_subdomain_for_blogspot_and_wordpress_com(self):
+        # When
+        result_1 = prepare_url("https://test.blogspot.com/sub-category/name.html")
+        result_2 = prepare_url(
+            "https://test.wordpress.com/sub-category/very-interesting-article.html"
+        )
+        # Then
+        assert result_1 == "test.blogspot.com"
+        assert result_2 == "test.wordpress.com"
+
+    def test_prepare_file_name(self):
+        # When:
+        result_1 = prepare_file_name("", self.given_domain)
+        result_2 = prepare_file_name("xxx", self.given_domain)
+        # Then:
+        assert result_1 == "example_com"
+        assert result_2 == "xxx"
+
+    @patch("urllib.request.urlopen")
+    def test_prepare_url_to_crawl(self, mock_urlopen):
+        # Given
+        mock_urlopen.return_value = MockRequestResult("ddd")
+        # When:
+        result = prepare_url_to_crawl(self.given_domain)
+        # Then:
+        assert mock_urlopen.called
+        assert mock_urlopen.call_count == 1
+        assert mock_urlopen.call_args_list[0].args == (self.given_https_url,)
+        assert result == "ddd"
+
+    def test_prepare_port(self):
+        # When:
+        http_result = prepare_port(self.given_http_url)
+        https_result = prepare_port(self.given_https_url)
+        # Then:
+        assert http_result == 80
+        assert https_result == 443
diff --git a/tests/unit/blog2epub/crawlers/test_universal.py b/tests/unit/blog2epub/crawlers/test_universal.py
@@ -1,21 +1,18 @@
 import unittest
 
 # import pytest
-from blog2epub.crawlers import UniversalCrawler
 
 
 class TestUniversalCrawler(unittest.TestCase):
-    def setUp(self):
-        self.crawler = UniversalCrawler()
-
     def test_robots(self):
         # Given
         # with open("tests/unit/blog2epub/crawlers/data/robots-1.txt") as f:
         #     given_robots = f.read()
-        sitemaps = self.crawler._get_sitemaps()
+        # sitemaps = self.crawler._get_sitemaps()
         # When
         # Then
-        print(sitemaps)
+        # print(sitemaps)
+        pass
 
     def test_sitemap_1(self):
         # Given