Skip to content

Commit

Permalink
unittesting and some other fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bohdanbobrowski committed Oct 31, 2024
1 parent 6390dfd commit 55fadd3
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/code_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ jobs:
- name: Static code analysys with mypy
run: |
poetry run mypy .
- name: Unit testing
run: |
poetry run pytest ./tests
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ _site
/.jekyll-cache/
/.sass-cache/
/vendor/
.bundle/
.bundle/
/htmlcov
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ Before you start, you'll need to install buildozer following this [installation
poetry run blog2epub poznanskiehistorie.blogspot.com -q=100
poetry run blog2epub classicameras.blogspot.com --limit=10 --no-images

## Running tests

pytest ./tests
pytest --cov=blog2epub ./tests
pytest --cov=blog2epub --cov-report=html ./tests


## Current version

### [v1.4.0]
Expand All @@ -121,12 +128,12 @@ Before you start, you'll need to install buildozer following this [installation
- [X] mypy and ruff pipeline job (via github Actions)
- [X] Android build
- [ ] unit testing - at least for some part of the code
- [ ] crawlers refactor - some part is done
- [x] crawlers refactor - some part is done
- [ ] fix minor Android bugs


[» Complete Change Log here «](https://github.com/bohdanbobrowski/blog2epub/blob/master/CHANGELOG.md)

## Project backlog

And finally, a list known bugs and future plans for some new functions and enhancements: [BACKLOG.md](https://github.com/bohdanbobrowski/blog2epub/blob/master/BACKLOG.md)
And finally, a list known bugs and future plans for some new functions and enhancements: [BACKLOG.md](https://github.com/bohdanbobrowski/blog2epub/blob/master/BACKLOG.md)
1 change: 1 addition & 0 deletions blog2epub/blog2epub_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ def success(self, ebook: Book):
success_content.add_widget(epub_cover_image_widget)

def open_ebook_in_default_viewer(inst):
self.interface.print(f"Opening file: {ebook.file_full_path} ({platform})")
if platform == "win":
os.startfile(ebook.file_full_path)
elif platform == "android":
Expand Down
11 changes: 8 additions & 3 deletions blog2epub/common/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@
import ssl
from urllib.error import URLError


from blog2epub.common.exceptions import BadUrlException

ssl._create_default_https_context = ssl._create_stdlib_context # type: ignore


def prepare_url(url: str) -> str:
return url.replace("http:", "").replace("https:", "").strip("/")
result = url.replace("http:", "").replace("https:", "").strip("/")
return result.split("/")[0]


def prepare_file_name(file_name: str | None, url: str) -> str:
if file_name:
return file_name
return url.replace("/", "_")
result = url.lower()
for x in ["/", ",", "."]:
result = result.replace(x, "_")
return result


def prepare_url_to_crawl(url: str) -> str:
Expand All @@ -25,7 +30,7 @@ def prepare_url_to_crawl(url: str) -> str:
raise BadUrlException


def prepare_port(url):
def prepare_port(url: str) -> int:
if url.startswith("https://"):
return 443
return 80
4 changes: 3 additions & 1 deletion blog2epub/crawlers/wordpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def _atom_feed_loop(self):
self.images = self.images + art.images
self.articles.append(art)
self._add_tags(art.tags)
if self.limit and len(self.articles) >= self.limit:
if self.configuration.limit and len(self.articles) >= int(
self.configuration.limit
):
next_page = None
break
if next_page:
Expand Down
97 changes: 96 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ types-pyinstaller = "^6.10.0.20240812"
types-python-dateutil = "^2.9.0.20240821"
types-requests = "^2.32.0.20240712"
cython = "^3.0.11"
pytest-cov = "^6.0.0"

[tool.poetry.scripts]
blog2epub = "blog2epub.blog2epub_cli:main"
Expand Down
79 changes: 79 additions & 0 deletions tests/unit/blog2epub/common/test_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import unittest
from unittest.mock import patch

from blog2epub.common.crawler import (
prepare_url,
prepare_file_name,
prepare_url_to_crawl,
prepare_port,
)


class MockRequestResult:
def __init__(self, url):
self.url = url

def __enter__(self):
return self

def __exit__(self, *args, **kwargs):
return self

def geturl(self):
return self.url


class TestCommonCrawler(unittest.TestCase):
def setUp(self):
# Given:
self.given_domain = "example.com"
self.given_http_url = "http://example.com"
self.given_https_url = "https://example.com"

def test_prepare_url(self):
# When:
result_0 = prepare_url(self.given_domain)
result_1 = prepare_url(self.given_http_url)
result_2 = prepare_url(self.given_https_url)
# Then:
assert result_0 == self.given_domain
assert result_1 == self.given_domain
assert result_2 == self.given_domain

def test_prepare_url_always_subdomain_for_blogspot_and_wordpress_com(self):
# When
result_1 = prepare_url("https://test.blogspot.com/sub-category/name.html")
result_2 = prepare_url(
"https://test.wordpress.com/sub-category/very-interesting-article.html"
)
# Then
assert result_1 == "test.blogspot.com"
assert result_2 == "test.wordpress.com"

def test_prepare_file_name(self):
# When:
result_1 = prepare_file_name("", self.given_domain)
result_2 = prepare_file_name("xxx", self.given_domain)
# Then:
assert result_1 == "example_com"
assert result_2 == "xxx"

@patch("urllib.request.urlopen")
def test_prepare_url_to_crawl(self, mock_urlopen):
# Given
mock_urlopen.return_value = MockRequestResult("ddd")
# When:
result = prepare_url_to_crawl(self.given_domain)
# Then:
assert mock_urlopen.called
assert mock_urlopen.call_count == 1
assert mock_urlopen.call_args_list[0].args == (self.given_https_url,)
assert result == "ddd"

def test_prepare_port(self):
# When:
http_result = prepare_port(self.given_http_url)
https_result = prepare_port(self.given_https_url)
# Then:
assert http_result == 80
assert https_result == 443
9 changes: 3 additions & 6 deletions tests/unit/blog2epub/crawlers/test_universal.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
import unittest

# import pytest
from blog2epub.crawlers import UniversalCrawler


class TestUniversalCrawler(unittest.TestCase):
def setUp(self):
self.crawler = UniversalCrawler()

def test_robots(self):
# Given
# with open("tests/unit/blog2epub/crawlers/data/robots-1.txt") as f:
# given_robots = f.read()
sitemaps = self.crawler._get_sitemaps()
# sitemaps = self.crawler._get_sitemaps()
# When
# Then
print(sitemaps)
# print(sitemaps)
pass

def test_sitemap_1(self):
# Given
Expand Down

0 comments on commit 55fadd3

Please sign in to comment.