artiomn · artiomn · Apr 27, 2024 · Apr 27, 2024 · Apr 27, 2024 · Apr 27, 2024
diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml
@@ -33,12 +33,12 @@ jobs:
       security-events: write
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
 
       - name: Run Pyre
-        uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d
+        uses: facebook/pyre-action@v0.0.2
         with:
           # To customize these inputs:
           # See https://github.com/facebook/pyre-action#inputs

diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -18,19 +18,18 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.9
+    - name: Set up Python
       uses: actions/setup-python@v1
       with:
-        python-version: 3.9
+        python-version: 3.12.3
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 mypy pytest pylint \
-          types-all types-attrs types-dataclasses types-PyYAML types-typed-ast
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-#    - name: MyPy types checking
-#      run: |
-#        mypy --config-file .mypy.ini
+        if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
+    - name: MyPy types checking
+      run: |
+        mypy --config-file .mypy.ini markdown_toolset
     - name: Lint with pylint
       run: |
         pylint -rn -sn --rcfile=.pylintrc --fail-on=I --load-plugins=pylint.extensions.docparams markdown_toolset

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -97,7 +97,8 @@ repos:
             "types-requests",
             "types-dataclasses>=0.1.3",
             "types-PyYAML",
-            "types-typed-ast>=1.4.1"
+            "types-typed-ast>=1.4.1",
+            "types-markdown>=3.6.0.20240316"
           ]
 #        exclude: tests(/\w*)*/functional/|tests/input|tests(/.*)+/conftest.py|doc/data/messages|tests(/\w*)*data/
 
@@ -110,7 +111,7 @@ repos:
     rev: "22.12.0"
     hooks:
       - id: black
-        args: ["-l", "120", "--skip-string-normalization"]
+        args: ["-l", "120", "--skip-string-normalization" ]
 
 #  - repo: https://github.com/DanielNoord/pydocstringformatter
 #    rev: v0.7.2

diff --git a/.pyre_configuration b/.pyre_configuration
@@ -0,0 +1,6 @@
+{
+  "source_directories": [
+    "."
+  ],
+  exclude: ["install_git_hooks.py"]
+}
diff --git a/markdown_tool.py b/markdown_tool.py
@@ -12,8 +12,7 @@
 from mimetypes import types_map
 from pathlib import Path
 
-from markdown_toolset.article_processor import ArticleProcessor, DeduplicationVariant,\
-    IN_FORMATS_LIST, OUT_FORMATS_LIST
+from markdown_toolset.article_processor import ArticleProcessor, DeduplicationVariant, IN_FORMATS_LIST, OUT_FORMATS_LIST
 
 from markdown_toolset.__version__ import __version__
 
@@ -29,41 +28,44 @@ class CustomArgumentDefaultsHelpFormatter(RawDescriptionHelpFormatter):
     """
 
     def _get_help_string(self, action):
-        help = action.help
+        help_ = action.help
         if '%(default)' not in action.help:
             if action.default is not SUPPRESS:
                 defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
                 if action.option_strings or action.nargs in defaulting_nargs:
-                    help += ' (default: %(default)s)'
-        return help
+                    help_ += ' (default: %(default)s)'
+        return help_
 
 
 def main(arguments):
     """
     Entrypoint.
     """
 
-    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%d.%m.%Y %H:%M:%S',
-                        level='DEBUG' if arguments.verbose else 'INFO')
+    logging.basicConfig(
+        format='%(asctime)s %(message)s', datefmt='%d.%m.%Y %H:%M:%S', level='DEBUG' if arguments.verbose else 'INFO'
+    )
 
     print(f'Markdown tool version {__version__} started...')
 
     if arguments.process_local_images:
         print('--process_local_images is deprecated and will be disabled in the next version!')
 
-    processor = ArticleProcessor(article_file_path_or_url=arguments.article_file_path_or_url,
-                                 skip_list=arguments.skip_list,
-                                 downloading_timeout=arguments.downloading_timeout,
-                                 output_format=arguments.output_format,
-                                 output_path=getattr(arguments, 'output_path', Path.cwd()),
-                                 remove_source=arguments.remove_source,
-                                 images_public_path=getattr(arguments, 'images_public_path', ''),
-                                 input_formats=arguments.input_format.split('+'),
-                                 skip_all_incorrect=arguments.skip_all_incorrect,
-                                 download_incorrect_mime=arguments.download_incorrect_mime,
-                                 deduplication_type=getattr(DeduplicationVariant, arguments.deduplication_type.upper()),
-                                 images_dirname=arguments.images_dirname,
-                                 save_hierarchy=arguments.prepend_images_with_path)
+    processor = ArticleProcessor(
+        article_file_path_or_url=arguments.article_file_path_or_url,
+        skip_list=arguments.skip_list,
+        downloading_timeout=arguments.downloading_timeout,
+        output_format=arguments.output_format,
+        output_path=getattr(arguments, 'output_path', Path.cwd()),
+        remove_source=arguments.remove_source,
+        images_public_path=getattr(arguments, 'images_public_path', ''),
+        input_formats=arguments.input_format.split('+'),
+        skip_all_incorrect=arguments.skip_all_incorrect,
+        download_incorrect_mime=arguments.download_incorrect_mime,
+        deduplication_type=getattr(DeduplicationVariant, arguments.deduplication_type.upper()),
+        images_dirname=arguments.images_dirname,
+        save_hierarchy=arguments.prepend_images_with_path,
+    )
 
     processor.process()
 
@@ -76,42 +78,75 @@ def main(arguments):
         prog='markdown_tool',
         epilog='Use tool at your own risk!',
         description=f'{__doc__}Version: {__version__}',
-        formatter_class=CustomArgumentDefaultsHelpFormatter
+        formatter_class=CustomArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument('article_file_path_or_url', type=str, help='path to the article file in the Markdown format')
+    parser.add_argument(
+        '-D',
+        '--deduplication-type',
+        choices=[i.name.lower() for i in DeduplicationVariant],
+        default='disabled',
+        help='Deduplicate images, using content hash or SHA1(image_name)',
+    )
+    parser.add_argument(
+        '-d',
+        '--images-dirname',
+        default='images',
+        help='Folder in which to download images ' '(possible variables: $article_name, $time, $date, $dt, $base_url)',
+    )
+    parser.add_argument(
+        '-a', '--skip-all-incorrect', default=False, action='store_true', help='skip all incorrect images'
+    )
+    parser.add_argument(
+        '-E',
+        '--download-incorrect-mime',
+        default=False,
+        action='store_true',
+        help='download "images" with unrecognized MIME type',
+    )
+    parser.add_argument(
+        '-s',
+        '--skip-list',
+        default=None,
+        help='skip URL\'s from the comma-separated list (or file with a leading \'@\')',
+    )
+    parser.add_argument('-i', '--input-format', default='md', choices=IN_FORMATS_LIST, help='input format')
+    parser.add_argument(
+        '-l', '--process-local-images', default=False, action='store_true', help='[DEPRECATED] Process local images'
+    )
+    parser.add_argument(
+        '-n',
+        '--replace-image-names',
+        default=False,
+        action='store_true',
+        help='Replace image names, using content hash',
+    )
+    parser.add_argument(
+        '-o', '--output-format', default=OUT_FORMATS_LIST[0], choices=OUT_FORMATS_LIST, help='output format'
+    )
+    parser.add_argument(
+        '-p',
+        '--images-public-path',
+        default=SUPPRESS,
+        help='Public path to the folder of downloaded images '
+        '(possible variables: $article_name, $time, $date, $dt, $base_url)',
     )
-    parser.add_argument('article_file_path_or_url', type=str,
-                        help='path to the article file in the Markdown format')
-    parser.add_argument('-D', '--deduplication-type', choices=[i.name.lower() for i in DeduplicationVariant],
-                        default='disabled', help='Deduplicate images, using content hash or SHA1(image_name)')
-    parser.add_argument('-d', '--images-dirname', default='images',
-                        help='Folder in which to download images '
-                             '(possible variables: $article_name, $time, $date, $dt, $base_url)')
-    parser.add_argument('-a', '--skip-all-incorrect', default=False, action='store_true',
-                        help='skip all incorrect images')
-    parser.add_argument('-E', '--download-incorrect-mime', default=False, action='store_true',
-                        help='download "images" with unrecognized MIME type')
-    parser.add_argument('-s', '--skip-list', default=None,
-                        help='skip URL\'s from the comma-separated list (or file with a leading \'@\')')
-    parser.add_argument('-i', '--input-format', default='md', choices=IN_FORMATS_LIST,
-                        help='input format')
-    parser.add_argument('-l', '--process-local-images', default=False, action='store_true',
-                        help='[DEPRECATED] Process local images')
-    parser.add_argument('-n', '--replace-image-names', default=False, action='store_true',
-                        help='Replace image names, using content hash')
-    parser.add_argument('-o', '--output-format', default=OUT_FORMATS_LIST[0], choices=OUT_FORMATS_LIST,
-                        help='output format')
-    parser.add_argument('-p', '--images-public-path', default=SUPPRESS,
-                        help='Public path to the folder of downloaded images '
-                             '(possible variables: $article_name, $time, $date, $dt, $base_url)')
     # TODO: Replace this with variables.
-    parser.add_argument('-P', '--prepend-images-with-path', default=False, action='store_true',
-                        help='Save relative images paths')
-    parser.add_argument('-R', '--remove-source', default=False, action='store_true',
-                        help='Remove or replace source file')
-    parser.add_argument('-t', '--downloading-timeout', type=float, default=-1,
-                        help='how many seconds to wait before downloading will be failed')
+    parser.add_argument(
+        '-P', '--prepend-images-with-path', default=False, action='store_true', help='Save relative images paths'
+    )
+    parser.add_argument(
+        '-R', '--remove-source', default=False, action='store_true', help='Remove or replace source file'
+    )
+    parser.add_argument(
+        '-t',
+        '--downloading-timeout',
+        type=float,
+        default=-1,
+        help='how many seconds to wait before downloading will be failed',
+    )
     parser.add_argument('-O', '--output-path', type=str, help='article output file name or path', default=SUPPRESS)
-    parser.add_argument('--verbose', '-v', default=False, action='store_true',
-                        help='More verbose logging')
+    parser.add_argument('--verbose', '-v', default=False, action='store_true', help='More verbose logging')
     parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help='return version number')
 
     args = parser.parse_args()

diff --git a/markdown_toolset/string_tools.py b/markdown_toolset/string_tools.py
@@ -3,7 +3,7 @@
 import re
 import unicodedata
 from pathlib import Path
-from typing import BinaryIO, Union
+from typing import BinaryIO, Union, TextIO, List, Dict
 
 
 def slugify(value):
@@ -37,3 +37,15 @@ def compare_files(filename1: Union[Path, str], filename2: Union[Path, str]) -> b
     with open(filename1, 'rb') as f1:
         with open(filename2, 'rb') as f2:
             return is_binary_same(f1, f2)
+
+
+def replace_strings(replacement_mapping: Dict[str, str], text_stream: TextIO) -> List[str]:
+    """Replace strings in the stream, using mapping."""
+
+    lines = []
+    for line in text_stream:
+        for src, target in replacement_mapping.items():
+            line = line.replace(src, str(target))
+        lines.append(line)
+
+    return lines
diff --git a/markdown_toolset/transformers/html/transformer.py b/markdown_toolset/transformers/html/transformer.py
@@ -8,6 +8,8 @@
 
 __all__ = ['ArticleTransformer']
 
+from ...string_tools import replace_strings
+
 
 class HTMLImageURLGrabber(HTMLParser, ABC):
     def __init__(self):
@@ -53,15 +55,8 @@ def _read_article(self) -> List[str]:
 
     def _fix_document_urls(self) -> List[str]:
         logging.debug('Replacing images urls in the document...')
-        replacement_mapping = self._replacement_mapping
-        lines = []
         self._article_stream.seek(self._start_pos)
-        for line in self._article_stream:
-            for src, target in replacement_mapping.items():
-                line = line.replace(src, str(target))
-            lines.append(line)
-
-        return lines
+        return replace_strings(self._replacement_mapping, self._article_stream)
 
     def run(self):
         """

diff --git a/markdown_toolset/transformers/md/transformer.py b/markdown_toolset/transformers/md/transformer.py
@@ -9,9 +9,11 @@
 from markdown.treeprocessors import Treeprocessor
 from markdown.extensions import Extension
 
-__all__ = ['ArticleTransformer']
+from ...image_downloader import ImageLink
+from ...string_tools import replace_strings
+
 
-from markdown_toolset.image_downloader import ImageLink
+__all__ = ['ArticleTransformer']
 
 
 class ImgExtractor(Treeprocessor):
@@ -82,12 +84,5 @@ def fix_n(n):
 
     def _fix_document_urls(self) -> List[str]:
         logging.debug('Replacing images urls in the document...')
-        replacement_mapping = self._replacement_mapping
-        lines = []
         self._article_stream.seek(self._start_pos)
-        for line in self._article_stream:
-            for src, target in replacement_mapping.items():
-                line = line.replace(src, str(target))
-            lines.append(line)
-
-        return lines
+        return replace_strings(self._replacement_mapping, self._article_stream)
diff --git a/markdown_toolset/www_tools.py b/markdown_toolset/www_tools.py
@@ -5,7 +5,6 @@
 
 from typing import Optional
 from mimetypes import guess_extension
-import os
 import re
 from urllib.parse import urlparse, urlunparse
 import requests
@@ -23,7 +22,6 @@ def is_url(url: str, allowed_url_prefixes=('http', 'ftp', 'https', 'ftps')) -> b
     """
     Check url for prefix match.
     """
-
     l_url = url.lower()
     for prefix in set(allowed_url_prefixes):
         if l_url.startswith(prefix.lower()):
@@ -43,6 +41,7 @@ def remove_protocol_prefix(url: str) -> str:
 def download_from_url(url: str, timeout: float = None):
     """
     Download file from the URL.
+
     :param url: URL to download.
     :param timeout: timeout before fail.
     :raise OSError: when HTTP status is not 200.
@@ -91,7 +90,10 @@ def get_filename_from_url(req: requests.Response) -> Optional[str]:
 
         result = file_name[0]
 
-    f_name, f_ext = os.path.splitext(result)
+    f_name, f_ext = (
+        (name_and_ext := result.rsplit('.', 1)),
+        (*name_and_ext, None) if len(name_and_ext) == 1 else name_and_ext,
+    )[1:][0]
 
     if f_name == '':
         return None
@@ -109,7 +111,6 @@ def get_base_url(req: requests.Response) -> Optional[str]:
     """
     Get base URL from url.
     """
-
     if req and req.url.find('/'):
         return req.url.rsplit('/', 1)[0]
 

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -1 +1,11 @@
+flake8
+mypy
 pre-commit==2.20.0
+pylint
+pytest
+types-all
+types-attrs
+types-dataclasses
+types-markdown
+types-PyYAML
+types-typed-ast