From 3f5f296b0d506b73222e3e5c9df6dbaeab9cb12e Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 5 Jun 2023 12:08:33 +0100 Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=94=A7=20Add=20typing=20to=20`Markdow?= =?UTF-8?q?nIt.add=5Frender=5Frule`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/main.py | 13 +++++++++++-- pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/markdown_it/main.py b/markdown_it/main.py index bb294a99..9dc71adc 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -2,7 +2,7 @@ from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping from contextlib import contextmanager -from typing import Any, Literal, overload +from typing import Any, Literal, Sequence, overload from . import helpers, presets from .common import normalize_url, utils @@ -227,11 +227,20 @@ def reset_rules(self) -> Generator[None, None, None]: self.inline.ruler2.enableOnly(chain_rules["inline2"]) def add_render_rule( - self, name: str, function: Callable[..., Any], fmt: str = "html" + self, + name: str, + function: Callable[ + [RendererProtocol, Sequence[Token], int, OptionsDict, EnvType], str + ], + fmt: str = "html", ) -> None: """Add a rule for rendering a particular Token type. Only applied when ``renderer.__output__ == fmt`` + + :param name: the name of the token type + :param function: the function to call to render the token; + it should have the signature ``function(tokens, idx, options, env)`` """ if self.renderer.__output__ == fmt: self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore diff --git a/pyproject.toml b/pyproject.toml index ea7cd036..633eef2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ compare = [ linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ - "mdit-py-plugins @ git+https://github.com/executablebooks/mdit-py-plugins@master", + "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", From 2dd286c9923437352a2dd785ac9182b4be853b17 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 5 Jun 2023 12:10:39 +0100 Subject: [PATCH 2/5] Update main.py --- markdown_it/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown_it/main.py b/markdown_it/main.py index 9dc71adc..ee85b24f 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -240,7 +240,7 @@ def add_render_rule( :param name: the name of the token type :param function: the function to call to render the token; - it should have the signature ``function(tokens, idx, options, env)`` + it should have the signature ``function(renderer, tokens, idx, options, env)`` """ if self.renderer.__output__ == fmt: self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore From f0643a2315ed2264363d4df08d1f83fe021e82e3 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 5 Jun 2023 12:32:15 +0100 Subject: [PATCH 3/5] update --- markdown_it/main.py | 2 +- markdown_it/renderer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/markdown_it/main.py b/markdown_it/main.py index ee85b24f..3baf9c7a 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -243,7 +243,7 @@ def add_render_rule( it should have the signature ``function(renderer, tokens, idx, options, env)`` """ if self.renderer.__output__ == fmt: - self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore + self.renderer.rules[name] = function.__get__(self.renderer) def use( self, plugin: Callable[..., None], *params: Any, **options: Any diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 7fee9ffa..15396824 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -9,7 +9,7 @@ class Renderer from collections.abc import Sequence import inspect -from typing import Any, ClassVar, Protocol +from typing import Any, Callable, ClassVar, MutableMapping, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token @@ -18,6 +18,10 @@ class Renderer class RendererProtocol(Protocol): __output__: ClassVar[str] + rules: MutableMapping[ + str, + Callable[[Sequence[Token], int, OptionsDict, EnvType], str], + ] def render( self, tokens: Sequence[Token], options: OptionsDict, env: EnvType From 615eb3f36ceb05f316d1e7920249d45685f8f316 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 5 Jun 2023 18:23:40 +0200 Subject: [PATCH 4/5] update --- markdown_it/renderer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 15396824..1438c4d0 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -9,12 +9,15 @@ class Renderer from collections.abc import Sequence import inspect -from typing import Any, Callable, ClassVar, MutableMapping, Protocol +from typing import TYPE_CHECKING, Any, Callable, ClassVar, MutableMapping, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token from .utils import EnvType, OptionsDict +if TYPE_CHECKING: + from markdown_it import MarkdownIt + class RendererProtocol(Protocol): __output__: ClassVar[str] @@ -28,6 +31,9 @@ def render( ) -> Any: ... + # note container and admon plugins also expect renderToken to be defined, + # but it is unclear if this should be a requirement for all renderers + class RendererHTML(RendererProtocol): """Contains render rules for tokens. Can be updated and extended. @@ -61,7 +67,7 @@ def strong_close(self, tokens, idx, options, env): __output__ = "html" - def __init__(self, parser: Any = None): + def __init__(self, parser: None | MarkdownIt = None): self.rules = { k: v for k, v in inspect.getmembers(self, predicate=inspect.ismethod) From a43a87b583fe7628361d57364fcfac81a3b9db22 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Jun 2023 18:57:26 +0200 Subject: [PATCH 5/5] improve docstrings --- markdown_it/main.py | 2 +- markdown_it/parser_block.py | 2 +- markdown_it/parser_core.py | 2 +- markdown_it/parser_inline.py | 6 +- markdown_it/ruler.py | 11 +++- markdown_it/rules_block/state_block.py | 78 ++++++++++++++++-------- markdown_it/rules_inline/state_inline.py | 28 ++++++--- 7 files changed, 89 insertions(+), 40 deletions(-) diff --git a/markdown_it/main.py b/markdown_it/main.py index 3baf9c7a..ad33c9ab 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -144,7 +144,7 @@ def configure( return self def get_all_rules(self) -> dict[str, list[str]]: - """Return the names of all active rules.""" + """Return the names of all rules.""" rules = { chain: self[chain].ruler.get_all_rules() for chain in ["core", "block", "inline"] diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 72360f9b..de9b3b2b 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -57,7 +57,7 @@ def __init__(self) -> None: def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: """Generate tokens for input range.""" - rules = self.ruler.getRules("") + rules = self.ruler.getRules() line = startLine maxNesting = state.md.options.maxNesting hasEmptyLines = False diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index ca5ab256..82b628b7 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -41,5 +41,5 @@ def __init__(self) -> None: def process(self, state: StateCore) -> None: """Executes core chain rules.""" - for rule in self.ruler.getRules(""): + for rule in self.ruler.getRules(): rule(state) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 0026c383..d0c587a4 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -67,7 +67,7 @@ def skipToken(self, state: StateInline) -> None: """ ok = False pos = state.pos - rules = self.ruler.getRules("") + rules = self.ruler.getRules() maxNesting = state.md.options["maxNesting"] cache = state.cache @@ -106,7 +106,7 @@ def skipToken(self, state: StateInline) -> None: def tokenize(self, state: StateInline) -> None: """Generate tokens for input range.""" ok = False - rules = self.ruler.getRules("") + rules = self.ruler.getRules() end = state.posMax maxNesting = state.md.options["maxNesting"] @@ -141,7 +141,7 @@ def parse( """Process input string and push inline tokens into `tokens`""" state = StateInline(src, md, env, tokens) self.tokenize(state) - rules2 = self.ruler2.getRules("") + rules2 = self.ruler2.getRules() for rule in rules2: rule(state) return state.tokens diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index bd8baba3..a38a4037 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -59,6 +59,7 @@ def srcCharCode(self) -> tuple[int, ...]: class RuleOptionsType(TypedDict, total=False): alt: list[str] + """list of rules which can be terminated by this one.""" RuleFuncTv = TypeVar("RuleFuncTv") @@ -71,9 +72,12 @@ class Rule(Generic[RuleFuncTv]): enabled: bool fn: RuleFuncTv = field(repr=False) alt: list[str] + """list of rules which can be terminated by this one.""" class Ruler(Generic[RuleFuncTv]): + """Class to manage functions (rules) which identify syntax elements.""" + def __init__(self) -> None: # List of added rules. self.__rules__: list[Rule[RuleFuncTv]] = [] @@ -255,10 +259,13 @@ def disable( def getRules(self, chainName: str = "") -> list[RuleFuncTv]: """Return array of active functions (rules) for given chain name. + It analyzes rules configuration, compiles caches if not exists and returns result. - Default chain name is `''` (empty string). It can't be skipped. - That's done intentionally, to keep signature monomorphic for high speed. + :param chainName: name of chain to return rules for: + - The default `""` means all "top-level rules for this ruler. + - A specific name can be used to fetch only rules which can terminate + the named rule (used for block level rules like paragraph, list, etc.) """ if self.__cache__ is None: diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 445ad265..ffcdf386 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -28,40 +28,68 @@ def __init__( self.tokens = tokens - self.bMarks: list[int] = [] # line begin offsets for fast jumps - self.eMarks: list[int] = [] # line end offsets for fast jumps - # offsets of the first non-space characters (tabs not expanded) + self.bMarks: list[int] = [] + """line begin offsets for fast jumps""" + + self.eMarks: list[int] = [] + """line end offsets for fast jumps""" + self.tShift: list[int] = [] - self.sCount: list[int] = [] # indents for each line (tabs expanded) + """Offsets of the first non-space characters (tabs not expanded)""" + + self.sCount: list[int] = [] + """indents for each line (tabs expanded)""" - # An amount of virtual spaces (tabs expanded) between beginning - # of each line (bMarks) and real beginning of that line. - # - # It exists only as a hack because blockquotes override bMarks - # losing information in the process. - # - # It's used only when expanding tabs, you can think about it as - # an initial tab length, e.g. bsCount=21 applied to string `\t123` - # means first tab should be expanded to 4-21%4 === 3 spaces. - # self.bsCount: list[int] = [] + """ + An amount of virtual spaces (tabs expanded) between beginning + of each line (bMarks) and real beginning of that line. + + It exists only as a hack because blockquotes override bMarks + losing information in the process. + + It's used only when expanding tabs, you can think about it as + an initial tab length, e.g. `bsCount=21` applied to string `\\t123` + means first tab should be expanded to `4-21 % 4 == 3` spaces. + """ + # # block parser variables - self.blkIndent = 0 # required block content indent (for example, if we are - # inside a list, it would be positioned after list marker) - self.line = 0 # line index in src - self.lineMax = 0 # lines count - self.tight = False # loose/tight mode for lists - self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any) - self.listIndent = -1 # indent of the current list block (-1 if there isn't any) - - # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' - # used in lists to determine if they interrupt a paragraph + # + + self.blkIndent = 0 + """required block content indent + (for example, if we are inside a list, it would be positioned after list marker) + """ + + self.line = 0 + """line index in src""" + self.lineMax = 0 + """Total lines count""" + + self.tight = False + """loose/tight mode for lists""" + + self.ddIndent = -1 + """indent of the current dd block (-1 if there isn't any), + used only by deflist plugin + """ + + self.listIndent = -1 + """indent of the current list block (-1 if there isn't any)""" + self.parentType = "root" + """ + can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' + used in lists to determine if they interrupt a paragraph + """ self.level = 0 + """Current nesting level of tokens, + +1 when adding opening token, -1 when adding closing token + """ - # renderer + # renderer (does not appear to be used) self.result = "" # Create caches diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index c0c491c4..0eb75ff2 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -51,28 +51,42 @@ def __init__( self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens) self.pos = 0 + """Current position in src string""" self.posMax = len(self.src) + """Length of the src string""" self.level = 0 + """Current nesting level of tokens, + +1 when adding opening token, -1 when adding closing token + """ self.pending = "" + """Accumulated text not yet converted to a token. + This will be added as a `text` token when the next token is pushed (before it), + or when the parser finishes running (after all other tokens). + """ self.pendingLevel = 0 + """The nesting level of the pending text""" - # Stores { start: end } pairs. Useful for backtrack - # optimization of pairs parse (emphasis, strikes). self.cache: dict[int, int] = {} + """ + Stores { start: end } pairs. + Useful for backtrack optimization of pairs parse (emphasis, strikes). + """ - # List of emphasis-like delimiters for current tag self.delimiters: list[Delimiter] = [] + """List of emphasis-like delimiters for current tag""" - # Stack of delimiter lists for upper level tags self._prev_delimiters: list[list[Delimiter]] = [] + """Stack of delimiter lists for upper level tags""" - # backticklength => last seen position self.backticks: dict[int, int] = {} + """backticklength => last seen position""" self.backticksScanned = False - # Counter used to disable inline linkify-it execution - # inside and markdown links self.linkLevel = 0 + """ + Counter used to disable inline linkify-it execution + inside `` and markdown links + """ def __repr__(self) -> str: return (