From 9122b529a5caa636fd4532a5e6148777b6ac664d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sun, 8 Mar 2020 19:34:42 +0000 Subject: [PATCH] Update to `mistletoe-ebp==0.9.4a2` dependency (#110) This update allows us to drop most of the span token patching code, and we now use the `Math` token directly from mistletoe. It also improves how token sets are instantiated within the renderers, making it a lot easier to swap in/out tokens to be included in the parse. --- .vscode/settings.json | 3 +- docs/api/tokens.rst | 10 - docs/conf.py | 1 + myst_parser/__init__.py | 2 +- myst_parser/block_tokens.py | 49 ++--- myst_parser/docutils_renderer.py | 62 ++++-- myst_parser/html_renderer.py | 75 ++++--- myst_parser/json_renderer.py | 49 +++-- myst_parser/span_tokens.py | 191 ++++-------------- setup.py | 2 +- tests/test_syntax/test_ast.py | 4 +- .../test_front_matter_basic_strings0_.yml | 1 + 12 files changed, 179 insertions(+), 270 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index db5a5166..9b88e43c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,5 +15,6 @@ "python.linting.pylintEnabled": false, "python.linting.flake8Enabled": true, "python.linting.enabled": true, - "autoDocstring.customTemplatePath": "docstring.fmt.mustache" + "autoDocstring.customTemplatePath": "docstring.fmt.mustache", + "python.pythonPath": "/anaconda/envs/ebp/bin/python" } \ No newline at end of file diff --git a/docs/api/tokens.rst b/docs/api/tokens.rst index a1a4ee1e..926bec26 100644 --- a/docs/api/tokens.rst +++ b/docs/api/tokens.rst @@ -43,16 +43,6 @@ Role :exclude-members: __init__ -Math -.... - -.. autoclass:: myst_parser.span_tokens.Math - :members: - :no-undoc-members: - :show-inheritance: - :exclude-members: __init__ - - Target ...... diff --git a/docs/conf.py b/docs/conf.py index 4b76a6f1..49f5563d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ def run_apidoc(app): autodoc_member_order = "bysource" nitpick_ignore = [ + ("py:class", "Any"), ("py:class", "Tuple"), ("py:class", "ForwardRef"), ("py:class", "NoneType"), diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py index 93a19fdc..311b240f 100644 --- a/myst_parser/__init__.py +++ b/myst_parser/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.4.1" +__version__ = "0.5.0a1" def text_to_tokens(text: str): diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py index 13471da7..8a266982 100644 --- a/myst_parser/block_tokens.py +++ b/myst_parser/block_tokens.py @@ -4,38 +4,9 @@ import attr from mistletoe import block_tokens -from mistletoe.block_tokens import ( # noqa: F401 - FrontMatter, - HTMLBlock, - Heading, - LinkDefinition, - ThematicBreak, - Table, - TableRow, - BlockCode, - CodeFence, -) +from mistletoe.block_tokens import Heading, ThematicBreak, CodeFence from mistletoe.attr_doc import autodoc -""" -Tokens to be included in the parsing process, in the order specified. -""" -__all__ = [ - "HTMLBlock", - "LineComment", - "BlockCode", - "Heading", - "Quote", - "CodeFence", - "ThematicBreak", - "BlockBreak", - "List", - "Table", - "LinkDefinition", - "Paragraph", - "FrontMatter", -] - @autodoc @attr.s(slots=True, kw_only=True) @@ -69,10 +40,10 @@ def read( # TODO this is a placeholder for implementing span level range storage # (with start/end character attributes) for result in doc.walk(): - if not hasattr(result.node, "position"): + if getattr(result.node, "position", None) is None: try: result.node.position = result.parent.position - except AttributeError: + except (AttributeError, TypeError): raise return doc @@ -157,7 +128,10 @@ def read(cls, lines): @autodoc @attr.s(slots=True, kw_only=True) class Quote(block_tokens.Quote): - """Quote token. (`["> # heading\\n", "> paragraph\\n"]`).""" + """Quote token. (`["> # heading\\n", "> paragraph\\n"]`). + + MyST variant, that includes transitions to `LineComment` and `BlockBreak`. + """ @classmethod def transition(cls, next_line): @@ -179,6 +153,8 @@ class Paragraph(block_tokens.Paragraph): """Paragraph token. (`["some\\n", "continuous\\n", "lines\\n"]`) Boundary between span-level and block-level tokens. + + MyST variant, that includes transitions to `LineComment` and `BlockBreak`. """ @classmethod @@ -197,7 +173,10 @@ def transition(cls, next_line): @autodoc @attr.s(slots=True, kw_only=True) class List(block_tokens.List): - """List token (unordered or ordered)""" + """List token (unordered or ordered) + + MyST variant, that includes transitions to `LineComment` and `BlockBreak`. + """ @classmethod def read(cls, lines): @@ -244,6 +223,8 @@ class ListItem(block_tokens.ListItem): """List items. Not included in the parsing process, but called by List. + + MyST variant, that includes transitions to `LineComment` and `BlockBreak`. """ @staticmethod diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py index 56c124c9..bf2ab97e 100644 --- a/myst_parser/docutils_renderer.py +++ b/myst_parser/docutils_renderer.py @@ -1,6 +1,5 @@ from contextlib import contextmanager import copy -from itertools import chain from os.path import splitext from pathlib import Path import re @@ -19,12 +18,11 @@ from docutils.utils import new_document, Reporter import yaml -from mistletoe import span_tokens +from mistletoe import block_tokens, block_tokens_ext, span_tokens, span_tokens_ext from mistletoe.renderers.base import BaseRenderer -from myst_parser import span_tokens as myst_span_tokens from myst_parser import block_tokens as myst_block_tokens -from mistletoe.parse_context import ParseContext, set_parse_context, tokens_from_module +from myst_parser import span_tokens as myst_span_tokens from myst_parser.parse_directives import parse_directive_text, DirectiveParsingError from myst_parser.utils import escape_url @@ -35,11 +33,43 @@ class DocutilsRenderer(BaseRenderer): Note this renderer has no dependencies on Sphinx. """ + default_block_tokens = ( + block_tokens.HTMLBlock, + myst_block_tokens.LineComment, + block_tokens.BlockCode, + block_tokens.Heading, + myst_block_tokens.Quote, + block_tokens.CodeFence, + block_tokens.ThematicBreak, + myst_block_tokens.BlockBreak, + myst_block_tokens.List, + block_tokens_ext.Table, + block_tokens.LinkDefinition, + myst_block_tokens.Paragraph, + ) + + default_span_tokens = ( + span_tokens.EscapeSequence, + myst_span_tokens.Role, + span_tokens.HTMLSpan, + span_tokens.AutoLink, + myst_span_tokens.Target, + span_tokens.CoreTokens, + span_tokens_ext.Math, + # TODO there is no matching core element in docutils for strikethrough + # span_tokens_ext.Strikethrough, + span_tokens.InlineCode, + span_tokens.LineBreak, + span_tokens.RawText, + ) + def __init__( self, document: Optional[nodes.document] = None, current_node: Optional[nodes.Element] = None, config: Optional[dict] = None, + find_blocks=None, + find_spans=None, ): """Initialise the renderer. @@ -47,7 +77,8 @@ def __init__( :param current_node: The root node from which to begin populating (default is document, or should be an ancestor of document) :param config: contains configuration specific to the rendering process - + :param find_blocks: override the default block tokens (classes or class paths) + :param find_spans: override the default span tokens (classes or class paths) """ self.config = config or {} self.document = document or self.new_document() # type: nodes.document @@ -57,20 +88,7 @@ def __init__( get_language(self.language_module) self._level_to_elem = {0: self.document} - super().__init__() - - _myst_span_tokens = tokens_from_module(myst_span_tokens) - _myst_block_tokens = tokens_from_module(myst_block_tokens) - - for token in chain(_myst_span_tokens, _myst_block_tokens): - render_func = getattr(self, self._cls_to_func(token.__name__)) - self.render_map[token.__name__] = render_func - - parse_context = ParseContext( - block_tokens=_myst_block_tokens, span_tokens=_myst_span_tokens - ) - set_parse_context(parse_context) - self.parse_context = parse_context.copy() + super().__init__(find_blocks=find_blocks, find_spans=find_spans) def new_document(self, source_path="notset") -> nodes.document: settings = OptionParser(components=(RSTParser,)).get_default_values() @@ -80,7 +98,7 @@ def add_line_and_source_path(self, node, token): """Copy the line number and document source path to the docutils node.""" try: node.line = token.position[0] + 1 - except AttributeError: + except (AttributeError, TypeError): pass node.source = self.document["source"] @@ -356,7 +374,7 @@ def render_image(self, token): img_node["uri"] = token.src img_node["alt"] = "" - if token.children and isinstance(token.children[0], myst_span_tokens.RawText): + if token.children and isinstance(token.children[0], span_tokens.RawText): img_node["alt"] = token.children[0].content token.children[0].content = "" @@ -451,7 +469,7 @@ def render_role(self, token): # TODO role name white/black lists try: lineno = token.position[0] - except AttributeError: + except (AttributeError, TypeError): lineno = 0 inliner = MockInliner(self, lineno) role_func, messages = roles.role( diff --git a/myst_parser/html_renderer.py b/myst_parser/html_renderer.py index 0c2849a7..02fd83d5 100644 --- a/myst_parser/html_renderer.py +++ b/myst_parser/html_renderer.py @@ -1,52 +1,67 @@ -import html -from itertools import chain -import re from textwrap import dedent -from mistletoe.parse_context import ParseContext, set_parse_context, tokens_from_module +from mistletoe import block_tokens, block_tokens_ext, span_tokens, span_tokens_ext from mistletoe.renderers import html as html_renderer -from myst_parser import span_tokens -from myst_parser import block_tokens +from myst_parser.block_tokens import LineComment, BlockBreak, Quote, Paragraph, List +from myst_parser.span_tokens import Role, Target class HTMLRenderer(html_renderer.HTMLRenderer): - """This HTML render uses the same block/span tokens as the docutils renderer. + """This HTML render uses the uses the MyST spec block and span tokens. It is used to test compliance with the commonmark spec, and can be used for basic previews, but does not run roles/directives, resolve cross-references etc... """ - def __init__(self, add_mathjax=False, as_standalone=False, add_css=None): + default_block_tokens = ( + block_tokens.HTMLBlock, + LineComment, + block_tokens.BlockCode, + block_tokens.Heading, + Quote, + block_tokens.CodeFence, + block_tokens.ThematicBreak, + BlockBreak, + List, + block_tokens_ext.Table, + block_tokens.LinkDefinition, + Paragraph, + ) + + default_span_tokens = ( + span_tokens.EscapeSequence, + Role, + span_tokens.HTMLSpan, + span_tokens.AutoLink, + Target, + span_tokens.CoreTokens, + span_tokens_ext.Math, + # TODO there is no matching core element in docutils for strikethrough + # span_tokens_ext.Strikethrough, + span_tokens.InlineCode, + span_tokens.LineBreak, + span_tokens.RawText, + ) + + def __init__( + self, + find_blocks=None, + find_spans=None, + add_mathjax=False, + as_standalone=False, + add_css=None, + ): """Intitalise HTML renderer + :param find_blocks: override the default block tokens (classes or class paths) + :param find_spans: override the default span tokens (classes or class paths) :param add_mathjax: add the mathjax CDN :param as_standalone: return the HTML body within a minmal HTML page :param add_css: if as_standalone=True, CSS to add to the header """ - self._suppress_ptag_stack = [False] - - super(html_renderer.HTMLRenderer, self).__init__() - - myst_span_tokens = tokens_from_module(span_tokens) - myst_block_tokens = tokens_from_module(block_tokens) - - for token in chain(myst_span_tokens, myst_block_tokens): - render_func = getattr(self, self._cls_to_func(token.__name__)) - self.render_map[token.__name__] = render_func - - parse_context = ParseContext(myst_block_tokens, myst_span_tokens) - set_parse_context(parse_context) - self.parse_context = parse_context.copy() - - # html.entities.html5 includes entitydefs not ending with ';', - # CommonMark seems to hate them, so... - self._stdlib_charref = html._charref - _charref = re.compile( - r"&(#[0-9]+;" r"|#[xX][0-9a-fA-F]+;" r"|[^\t\n\f <&#;]{1,32};)" - ) - html._charref = _charref + super().__init__(find_blocks=find_blocks, find_spans=find_spans) self.mathjax_src = "" if add_mathjax: diff --git a/myst_parser/json_renderer.py b/myst_parser/json_renderer.py index b07b8d90..02146912 100644 --- a/myst_parser/json_renderer.py +++ b/myst_parser/json_renderer.py @@ -1,24 +1,41 @@ """JSON renderer for myst.""" -from itertools import chain - -from mistletoe.parse_context import ParseContext, set_parse_context, tokens_from_module +from mistletoe import block_tokens, block_tokens_ext, span_tokens, span_tokens_ext from mistletoe.renderers import json -from myst_parser import span_tokens -from myst_parser import block_tokens +from myst_parser.block_tokens import LineComment, BlockBreak, Quote, Paragraph, List +from myst_parser.span_tokens import Role, Target class JsonRenderer(json.JsonRenderer): - def __init__(self): - """This AST render uses the same block/span tokens as the docutils renderer.""" - super().__init__() - myst_span_tokens = tokens_from_module(span_tokens) - myst_block_tokens = tokens_from_module(block_tokens) + """This JSON render uses the MyST spec block and span tokens. + """ - for token in chain(myst_span_tokens, myst_block_tokens): - render_func = getattr(self, self._cls_to_func(token.__name__)) - self.render_map[token.__name__] = render_func + default_block_tokens = ( + block_tokens.HTMLBlock, + LineComment, + block_tokens.BlockCode, + block_tokens.Heading, + Quote, + block_tokens.CodeFence, + block_tokens.ThematicBreak, + BlockBreak, + List, + block_tokens_ext.Table, + block_tokens.LinkDefinition, + Paragraph, + ) - parse_context = ParseContext(myst_block_tokens, myst_span_tokens) - set_parse_context(parse_context) - self.parse_context = parse_context.copy() + default_span_tokens = ( + span_tokens.EscapeSequence, + Role, + span_tokens.HTMLSpan, + span_tokens.AutoLink, + Target, + span_tokens.CoreTokens, + span_tokens_ext.Math, + # TODO there is no matching core element in docutils for strikethrough + # span_tokens_ext.Strikethrough, + span_tokens.InlineCode, + span_tokens.LineBreak, + span_tokens.RawText, + ) diff --git a/myst_parser/span_tokens.py b/myst_parser/span_tokens.py index 66de0a7e..abeb5721 100644 --- a/myst_parser/span_tokens.py +++ b/myst_parser/span_tokens.py @@ -1,60 +1,16 @@ import re -from threading import local +from typing import Pattern, Tuple -from mistletoe import span_tokens, nested_tokenizer -from mistletoe.span_tokens import ( # noqa F401 - HTMLSpan, - Emphasis, - EscapeSequence, - AutoLink, - Image, - LineBreak, - Link, - RawText, - Strong, -) +import attr -""" -Tokens to be included in the parsing process, in the order specified. -RawText is last as a 'fallback' token -""" -__all__ = ( - "Role", - "HTMLSpan", - "EscapeSequence", - "AutoLink", - "Target", - "CoreTokens", - "Math", - "InlineCode", - "LineBreak", - "RawText", -) -# Note Strikethrough is left out from the core mistletoe tokens, -# since there is no matching element in docutils +from mistletoe import span_tokens +from mistletoe.attr_doc import autodoc - -_core_matches = local() -_core_matches.value = {} - - -class CoreTokens(span_tokens.SpanToken): - precedence = 3 - - def __new__(self, match): - return globals()[match.type](match) - - @classmethod - def find(cls, string): - return find_core_tokens(string) - - -class InlineCode(span_tokens.InlineCode): - @classmethod - def find(cls, string): - return _core_matches.value.pop("InlineCode", []) +__all__ = ("Role", "Target") +@autodoc +@attr.s(kw_only=True, slots=True) class Role(span_tokens.SpanToken): """ Inline role tokens. ("{name}`some code`") @@ -66,115 +22,44 @@ class Role(span_tokens.SpanToken): ) parse_inner = False - def __init__(self, match): - self.role_name = match.group(1) - content = match.group(3) - self.children = ( - span_tokens.RawText(" ".join(re.split("[ \n]+", content.strip()))), - ) - - -class Math(span_tokens.SpanToken): - - pattern = re.compile(r"(?=3.6", - install_requires=["mistletoe-ebp~=0.9.3"], + install_requires=["mistletoe-ebp==0.9.4a2"], extras_require={ "sphinx": ["pyyaml", "docutils>=0.15", "sphinx>=2,<3"], "code_style": ["flake8<3.8.0,>=3.7.0", "black", "pre-commit==1.17.0"], diff --git a/tests/test_syntax/test_ast.py b/tests/test_syntax/test_ast.py index 05acd326..3c5355a4 100644 --- a/tests/test_syntax/test_ast.py +++ b/tests/test_syntax/test_ast.py @@ -45,9 +45,9 @@ def test_walk(json_renderer): ("RawText()", "Paragraph(children=2, position=(1, 1))", 2), ("Strong(children=1)", "Paragraph(children=2, position=(1, 1))", 2), ("RawText()", "Paragraph(children=2, position=(3, 3))", 2), - ("Link(children=1)", "Paragraph(children=2, position=(3, 3))", 2), + ("Link(target='link', title='')", "Paragraph(children=2, position=(3, 3))", 2), ("RawText()", "Strong(children=1)", 3), - ("Emphasis(children=1)", "Link(children=1)", 3), + ("Emphasis(children=1)", "Link(target='link', title='')", 3), ("RawText()", "Emphasis(children=1)", 4), ] diff --git a/tests/test_syntax/test_ast/test_front_matter_basic_strings0_.yml b/tests/test_syntax/test_ast/test_front_matter_basic_strings0_.yml index 70543bc3..7fd01dbe 100644 --- a/tests/test_syntax/test_ast/test_front_matter_basic_strings0_.yml +++ b/tests/test_syntax/test_ast/test_front_matter_basic_strings0_.yml @@ -6,5 +6,6 @@ front_matter: position: - 0 - 3 + type: FrontMatter link_definitions: {} type: Document