Add and improve type annotations

Python-Markdown · Nov 1, 2023 · 39935ed · 39935ed
1 parent 8e517de
commit 39935ed
Show file tree

Hide file tree

Showing 25 changed files with 319 additions and 238 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 * Fix type annotations for `convertFile` - it accepts only bytes-based buffers.
   Also remove legacy checks from Python 2 (#1400)
+* Improve and expand type annotations in the code base (#1401).
 
 ## [3.5.1] -- 2023-10-31
 

diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
@@ -171,14 +171,14 @@ def __init__(self, *args):
         super().__init__(*args)
         self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return block.startswith(' '*self.tab_length) and \
             not self.parser.state.isstate('detabbed') and \
             (parent.tag in self.ITEM_TYPES or
                 (len(parent) and parent[-1] is not None and
                     (parent[-1].tag in self.LIST_TYPES)))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         level, sibling = self.get_level(parent, block)
         block = self.looseDetab(block, level)
@@ -251,10 +251,10 @@ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Eleme
 class CodeBlockProcessor(BlockProcessor):
     """ Process code blocks. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return block.startswith(' '*self.tab_length)
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         sibling = self.lastChild(parent)
         block = blocks.pop(0)
         theRest = ''
@@ -286,10 +286,10 @@ class BlockQuoteProcessor(BlockProcessor):
 
     RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -353,10 +353,10 @@ def __init__(self, parser: BlockParser):
         self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
                                     (self.tab_length, self.tab_length * 2 - 1))
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.match(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         # Check for multiple items in one block.
         items = self.get_items(blocks.pop(0))
         sibling = self.lastChild(parent)
@@ -460,10 +460,10 @@ class HashHeaderProcessor(BlockProcessor):
     # Detect a header at start of any line in block
     RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.search(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -491,10 +491,10 @@ class SetextHeaderProcessor(BlockProcessor):
     # Detect Setext-style header. Must be first 2 lines of block.
     RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.match(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         lines = blocks.pop(0).split('\n')
         # Determine level. `=` is 1 and `-` is 2.
         if lines[1].startswith('='):
@@ -517,15 +517,15 @@ class HRProcessor(BlockProcessor):
     # Detect hr on any line of a block.
     SEARCH_RE = re.compile(RE, re.MULTILINE)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         m = self.SEARCH_RE.search(block)
         if m:
             # Save match object on class instance so we can use it later.
             self.match = m
             return True
         return False
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         match = self.match
         # Check for lines in block before `hr`.
@@ -545,10 +545,10 @@ def run(self, parent, blocks):
 class EmptyBlockProcessor(BlockProcessor):
     """ Process blocks that are empty or start with an empty line. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return not block or block.startswith('\n')
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         filler = '\n\n'
         if block:
@@ -575,10 +575,10 @@ class ReferenceProcessor(BlockProcessor):
         r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
     )
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -601,10 +601,10 @@ def run(self, parent, blocks):
 class ParagraphProcessor(BlockProcessor):
     """ Process Paragraph blocks. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         if block.strip():
             # Not a blank block. Add to parent, otherwise throw it away.

diff --git a/markdown/core.py b/markdown/core.py
@@ -159,7 +159,7 @@ def build_parser(self) -> Markdown:
     def registerExtensions(
         self,
         extensions: Sequence[Extension | str],
-        configs: Mapping[str, Mapping[str, Any]]
+        configs: Mapping[str, dict[str, Any]]
     ) -> Markdown:
         """
         Load a list of extensions into an instance of the `Markdown` class.
@@ -491,8 +491,8 @@ def markdownFromFile(**kwargs: Any):
     [`convert`][markdown.Markdown.convert].
 
     Keyword arguments:
-        input (str | TextIO): A file name or readable object.
-        output (str | TextIO): A file name or writable object.
+        input (str | BinaryIO): A file name or readable object.
+        output (str | BinaryIO): A file name or writable object.
         encoding (str): Encoding of input and output.
         **kwargs: Any arguments accepted by the `Markdown` class.
 

diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py
@@ -27,7 +27,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Mapping, Sequence
+from typing import TYPE_CHECKING, Any, Iterable, Mapping
 from ..util import parseBoolValue
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None:
             value = parseBoolValue(value, preserve_none=True)
         self.config[key][0] = value
 
-    def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
+    def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None:
         """
         Loop through a collection of configuration options, passing each to
         [`setConfig`][markdown.extensions.Extension.setConfig].

diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
@@ -43,10 +43,10 @@ class AbbrPreprocessor(BlockProcessor):
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         """
         Find and remove all Abbreviation references from the text.
         Each reference is set as a new `AbbrPattern` in the markdown instance.
@@ -71,7 +71,7 @@ def run(self, parent, blocks):
         blocks.insert(0, block)
         return False
 
-    def _generate_pattern(self, text):
+    def _generate_pattern(self, text: str) -> str:
         """
         Given a string, returns an regex pattern to match that string.
 
@@ -90,11 +90,11 @@ def _generate_pattern(self, text):
 class AbbrInlineProcessor(InlineProcessor):
     """ Abbreviation inline pattern. """
 
-    def __init__(self, pattern, title):
+    def __init__(self, pattern: str, title: str):
         super().__init__(pattern)
         self.title = title
 
-    def handleMatch(self, m, data):
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
         abbr = etree.Element('abbr')
         abbr.text = AtomicString(m.group('abbr'))
         abbr.set('title', self.title)

diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py
@@ -30,6 +30,10 @@
 from ..blockprocessors import BlockProcessor
 import xml.etree.ElementTree as etree
 import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:  # pragma: no cover
+    from markdown import blockparser
 
 
 class AdmonitionExtension(Extension):
@@ -49,15 +53,15 @@ class AdmonitionProcessor(BlockProcessor):
     RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
     RE_SPACES = re.compile('  +')
 
-    def __init__(self, parser):
+    def __init__(self, parser: blockparser.BlockParser):
         """Initialization."""
 
         super().__init__(parser)
 
-        self.current_sibling = None
-        self.content_indention = 0
+        self.current_sibling: etree.Element | None = None
+        self.content_indent = 0
 
-    def parse_content(self, parent, block):
+    def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]:
         """Get sibling admonition.
 
         Retrieve the appropriate sibling element. This can get tricky when
@@ -115,14 +119,14 @@ def parse_content(self, parent, block):
 
         return sibling, block, the_rest
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
 
         if self.RE.search(block):
             return True
         else:
             return self.parse_content(parent, block)[0] is not None
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         m = self.RE.search(block)
 
@@ -160,7 +164,7 @@ def run(self, parent, blocks):
             # list for future processing.
             blocks.insert(0, theRest)
 
-    def get_class_and_title(self, match):
+    def get_class_and_title(self, match: re.Match[str]) -> tuple[str, str | None]:
         klass, title = match.group(1).lower(), match.group(2)
         klass = self.RE_SPACES.sub(' ', klass)
         if title is None:

diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -86,7 +86,7 @@ class AttrListTreeprocessor(Treeprocessor):
                          r'\uf900-\ufdcf\ufdf0-\ufffd'
                          r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
 
-    def run(self, doc: Element):
+    def run(self, doc: Element) -> None:
         for elem in doc.iter():
             if self.md.is_block_level(elem.tag):
                 # Block level: check for `attrs` on last line of text

diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
@@ -24,6 +24,11 @@
 from . import Extension
 from ..treeprocessors import Treeprocessor
 from ..util import parseBoolValue
+from typing import TYPE_CHECKING, Callable, Any
+
+if TYPE_CHECKING:  # pragma: no cover
+    from markdown import Markdown
+    import xml.etree.ElementTree as etree
 
 try:  # pragma: no cover
     from pygments import highlight
@@ -110,11 +115,11 @@ class CodeHilite:
 
     def __init__(self, src: str, **options):
         self.src = src
-        self.lang = options.pop('lang', None)
-        self.guess_lang = options.pop('guess_lang', True)
-        self.use_pygments = options.pop('use_pygments', True)
-        self.lang_prefix = options.pop('lang_prefix', 'language-')
-        self.pygments_formatter = options.pop('pygments_formatter', 'html')
+        self.lang: str | None = options.pop('lang', None)
+        self.guess_lang: bool = options.pop('guess_lang', True)
+        self.use_pygments: bool = options.pop('use_pygments', True)
+        self.lang_prefix: str = options.pop('lang_prefix', 'language-')
+        self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html')
 
         if 'linenos' not in options:
             options['linenos'] = options.pop('linenums', None)
@@ -128,7 +133,7 @@ def __init__(self, src: str, **options):
 
         self.options = options
 
-    def hilite(self, shebang=True) -> str:
+    def hilite(self, shebang: bool = True) -> str:
         """
         Pass code to the [Pygments](https://pygments.org/) highlighter with
         optional line numbers. The output should then be styled with CSS to
@@ -187,7 +192,7 @@ def hilite(self, shebang=True) -> str:
                 txt
             )
 
-    def _parseHeader(self):
+    def _parseHeader(self) -> None:
         """
         Determines language of a code block from shebang line and whether the
         said line should be removed or left in place. If the shebang line
@@ -249,7 +254,10 @@ def _parseHeader(self):
 class HiliteTreeprocessor(Treeprocessor):
     """ Highlight source code in code blocks. """
 
-    def code_unescape(self, text):
+    config: dict[str, Any]
+    md: Markdown
+
+    def code_unescape(self, text: str) -> str:
         """Unescape code."""
         text = text.replace("&lt;", "<")
         text = text.replace("&gt;", ">")
@@ -258,7 +266,7 @@ def code_unescape(self, text):
         text = text.replace("&amp;", "&")
         return text
 
-    def run(self, root):
+    def run(self, root: etree.Element) -> None:
         """ Find code blocks and store in `htmlStash`. """
         blocks = root.iter('pre')
         for block in blocks:

diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py
@@ -33,10 +33,10 @@ class DefListProcessor(BlockProcessor):
     RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)')
     NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]')
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.search(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
 
         raw_block = blocks.pop(0)
         m = self.RE.search(raw_block)
@@ -88,6 +88,7 @@ def run(self, parent, blocks):
 
         if theRest:
             blocks.insert(0, theRest)
+        return None
 
 
 class DefListIndentProcessor(ListIndentProcessor):
@@ -99,7 +100,7 @@ class DefListIndentProcessor(ListIndentProcessor):
     LIST_TYPES = ['dl', 'ol', 'ul']
     """ Include `dl` is list types. """
 
-    def create_item(self, parent, block):
+    def create_item(self, parent: etree.Element, block: str) -> None:
         """ Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """
 
         dd = etree.SubElement(parent, 'dd')