diff --git a/markdown_it/main.py b/markdown_it/main.py index bb294a99..ad33c9ab 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -2,7 +2,7 @@ from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping from contextlib import contextmanager -from typing import Any, Literal, overload +from typing import Any, Literal, Sequence, overload from . import helpers, presets from .common import normalize_url, utils @@ -144,7 +144,7 @@ def configure( return self def get_all_rules(self) -> dict[str, list[str]]: - """Return the names of all active rules.""" + """Return the names of all rules.""" rules = { chain: self[chain].ruler.get_all_rules() for chain in ["core", "block", "inline"] @@ -227,14 +227,23 @@ def reset_rules(self) -> Generator[None, None, None]: self.inline.ruler2.enableOnly(chain_rules["inline2"]) def add_render_rule( - self, name: str, function: Callable[..., Any], fmt: str = "html" + self, + name: str, + function: Callable[ + [RendererProtocol, Sequence[Token], int, OptionsDict, EnvType], str + ], + fmt: str = "html", ) -> None: """Add a rule for rendering a particular Token type. Only applied when ``renderer.__output__ == fmt`` + + :param name: the name of the token type + :param function: the function to call to render the token; + it should have the signature ``function(renderer, tokens, idx, options, env)`` """ if self.renderer.__output__ == fmt: - self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore + self.renderer.rules[name] = function.__get__(self.renderer) def use( self, plugin: Callable[..., None], *params: Any, **options: Any diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 72360f9b..de9b3b2b 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -57,7 +57,7 @@ def __init__(self) -> None: def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: """Generate tokens for input range.""" - rules = self.ruler.getRules("") + rules = self.ruler.getRules() line = startLine maxNesting = state.md.options.maxNesting hasEmptyLines = False diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index ca5ab256..82b628b7 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -41,5 +41,5 @@ def __init__(self) -> None: def process(self, state: StateCore) -> None: """Executes core chain rules.""" - for rule in self.ruler.getRules(""): + for rule in self.ruler.getRules(): rule(state) diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 0026c383..d0c587a4 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -67,7 +67,7 @@ def skipToken(self, state: StateInline) -> None: """ ok = False pos = state.pos - rules = self.ruler.getRules("") + rules = self.ruler.getRules() maxNesting = state.md.options["maxNesting"] cache = state.cache @@ -106,7 +106,7 @@ def skipToken(self, state: StateInline) -> None: def tokenize(self, state: StateInline) -> None: """Generate tokens for input range.""" ok = False - rules = self.ruler.getRules("") + rules = self.ruler.getRules() end = state.posMax maxNesting = state.md.options["maxNesting"] @@ -141,7 +141,7 @@ def parse( """Process input string and push inline tokens into `tokens`""" state = StateInline(src, md, env, tokens) self.tokenize(state) - rules2 = self.ruler2.getRules("") + rules2 = self.ruler2.getRules() for rule in rules2: rule(state) return state.tokens diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 7fee9ffa..1438c4d0 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -9,21 +9,31 @@ class Renderer from collections.abc import Sequence import inspect -from typing import Any, ClassVar, Protocol +from typing import TYPE_CHECKING, Any, Callable, ClassVar, MutableMapping, Protocol from .common.utils import escapeHtml, unescapeAll from .token import Token from .utils import EnvType, OptionsDict +if TYPE_CHECKING: + from markdown_it import MarkdownIt + class RendererProtocol(Protocol): __output__: ClassVar[str] + rules: MutableMapping[ + str, + Callable[[Sequence[Token], int, OptionsDict, EnvType], str], + ] def render( self, tokens: Sequence[Token], options: OptionsDict, env: EnvType ) -> Any: ... + # note container and admon plugins also expect renderToken to be defined, + # but it is unclear if this should be a requirement for all renderers + class RendererHTML(RendererProtocol): """Contains render rules for tokens. Can be updated and extended. @@ -57,7 +67,7 @@ def strong_close(self, tokens, idx, options, env): __output__ = "html" - def __init__(self, parser: Any = None): + def __init__(self, parser: None | MarkdownIt = None): self.rules = { k: v for k, v in inspect.getmembers(self, predicate=inspect.ismethod) diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index bd8baba3..a38a4037 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -59,6 +59,7 @@ def srcCharCode(self) -> tuple[int, ...]: class RuleOptionsType(TypedDict, total=False): alt: list[str] + """list of rules which can be terminated by this one.""" RuleFuncTv = TypeVar("RuleFuncTv") @@ -71,9 +72,12 @@ class Rule(Generic[RuleFuncTv]): enabled: bool fn: RuleFuncTv = field(repr=False) alt: list[str] + """list of rules which can be terminated by this one.""" class Ruler(Generic[RuleFuncTv]): + """Class to manage functions (rules) which identify syntax elements.""" + def __init__(self) -> None: # List of added rules. self.__rules__: list[Rule[RuleFuncTv]] = [] @@ -255,10 +259,13 @@ def disable( def getRules(self, chainName: str = "") -> list[RuleFuncTv]: """Return array of active functions (rules) for given chain name. + It analyzes rules configuration, compiles caches if not exists and returns result. - Default chain name is `''` (empty string). It can't be skipped. - That's done intentionally, to keep signature monomorphic for high speed. + :param chainName: name of chain to return rules for: + - The default `""` means all "top-level rules for this ruler. + - A specific name can be used to fetch only rules which can terminate + the named rule (used for block level rules like paragraph, list, etc.) """ if self.__cache__ is None: diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 445ad265..ffcdf386 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -28,40 +28,68 @@ def __init__( self.tokens = tokens - self.bMarks: list[int] = [] # line begin offsets for fast jumps - self.eMarks: list[int] = [] # line end offsets for fast jumps - # offsets of the first non-space characters (tabs not expanded) + self.bMarks: list[int] = [] + """line begin offsets for fast jumps""" + + self.eMarks: list[int] = [] + """line end offsets for fast jumps""" + self.tShift: list[int] = [] - self.sCount: list[int] = [] # indents for each line (tabs expanded) + """Offsets of the first non-space characters (tabs not expanded)""" + + self.sCount: list[int] = [] + """indents for each line (tabs expanded)""" - # An amount of virtual spaces (tabs expanded) between beginning - # of each line (bMarks) and real beginning of that line. - # - # It exists only as a hack because blockquotes override bMarks - # losing information in the process. - # - # It's used only when expanding tabs, you can think about it as - # an initial tab length, e.g. bsCount=21 applied to string `\t123` - # means first tab should be expanded to 4-21%4 === 3 spaces. - # self.bsCount: list[int] = [] + """ + An amount of virtual spaces (tabs expanded) between beginning + of each line (bMarks) and real beginning of that line. + + It exists only as a hack because blockquotes override bMarks + losing information in the process. + + It's used only when expanding tabs, you can think about it as + an initial tab length, e.g. `bsCount=21` applied to string `\\t123` + means first tab should be expanded to `4-21 % 4 == 3` spaces. + """ + # # block parser variables - self.blkIndent = 0 # required block content indent (for example, if we are - # inside a list, it would be positioned after list marker) - self.line = 0 # line index in src - self.lineMax = 0 # lines count - self.tight = False # loose/tight mode for lists - self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any) - self.listIndent = -1 # indent of the current list block (-1 if there isn't any) - - # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' - # used in lists to determine if they interrupt a paragraph + # + + self.blkIndent = 0 + """required block content indent + (for example, if we are inside a list, it would be positioned after list marker) + """ + + self.line = 0 + """line index in src""" + self.lineMax = 0 + """Total lines count""" + + self.tight = False + """loose/tight mode for lists""" + + self.ddIndent = -1 + """indent of the current dd block (-1 if there isn't any), + used only by deflist plugin + """ + + self.listIndent = -1 + """indent of the current list block (-1 if there isn't any)""" + self.parentType = "root" + """ + can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' + used in lists to determine if they interrupt a paragraph + """ self.level = 0 + """Current nesting level of tokens, + +1 when adding opening token, -1 when adding closing token + """ - # renderer + # renderer (does not appear to be used) self.result = "" # Create caches diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index c0c491c4..0eb75ff2 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -51,28 +51,42 @@ def __init__( self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens) self.pos = 0 + """Current position in src string""" self.posMax = len(self.src) + """Length of the src string""" self.level = 0 + """Current nesting level of tokens, + +1 when adding opening token, -1 when adding closing token + """ self.pending = "" + """Accumulated text not yet converted to a token. + This will be added as a `text` token when the next token is pushed (before it), + or when the parser finishes running (after all other tokens). + """ self.pendingLevel = 0 + """The nesting level of the pending text""" - # Stores { start: end } pairs. Useful for backtrack - # optimization of pairs parse (emphasis, strikes). self.cache: dict[int, int] = {} + """ + Stores { start: end } pairs. + Useful for backtrack optimization of pairs parse (emphasis, strikes). + """ - # List of emphasis-like delimiters for current tag self.delimiters: list[Delimiter] = [] + """List of emphasis-like delimiters for current tag""" - # Stack of delimiter lists for upper level tags self._prev_delimiters: list[list[Delimiter]] = [] + """Stack of delimiter lists for upper level tags""" - # backticklength => last seen position self.backticks: dict[int, int] = {} + """backticklength => last seen position""" self.backticksScanned = False - # Counter used to disable inline linkify-it execution - # inside and markdown links self.linkLevel = 0 + """ + Counter used to disable inline linkify-it execution + inside `` and markdown links + """ def __repr__(self) -> str: return ( diff --git a/pyproject.toml b/pyproject.toml index ea7cd036..633eef2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ compare = [ linkify = ["linkify-it-py>=1,<3"] plugins = ["mdit-py-plugins"] rtd = [ - "mdit-py-plugins @ git+https://github.com/executablebooks/mdit-py-plugins@master", + "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx",