From e14202317dc13dd5eed93b5d7cfd183c87de893f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Fri, 24 May 2024 20:20:15 +0200 Subject: [PATCH] refactor: Use a custom autoref HTML tag PR-48: https://github.com/mkdocstrings/autorefs/pull/48 --- src/mkdocs_autorefs/plugin.py | 3 +- src/mkdocs_autorefs/references.py | 97 +++++++++++++++++++++++++++++-- tests/test_references.py | 57 ++++++++++++++++-- 3 files changed, 145 insertions(+), 12 deletions(-) diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 6bfe608..52e60fa 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -52,6 +52,7 @@ class AutorefsPlugin(BasePlugin): scan_toc: bool = True current_page: str | None = None + legacy_refs: bool = True def __init__(self) -> None: """Initialize the object.""" @@ -211,7 +212,7 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: log.debug(f"Fixing references in page {page.file.src_path}") url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor) - fixed_output, unmapped = fix_refs(output, url_mapper) + fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs) if unmapped and log.isEnabledFor(logging.WARNING): for ref in unmapped: diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index fc39810..3101a41 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -6,6 +6,7 @@ import re import warnings from html import escape, unescape +from html.parser import HTMLParser from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match from urllib.parse import urlsplit from xml.etree.ElementTree import Element @@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any: rf"(?: class=(?P{_ATTR_VALUE}))?(?P [^<>]+)?>(?P.*?)</span>", flags=re.DOTALL, ) -"""A regular expression to match mkdocs-autorefs' special reference markers +"""Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead.""" + +AUTOREF_RE = re.compile(r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>", flags=re.DOTALL) +"""The autoref HTML tag regular expression. + +A regular expression to match mkdocs-autorefs' special reference markers in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page]. """ @@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element: Returns: A new element. """ - el = Element("span") - el.set("data-autorefs-identifier", identifier) + el = Element("autoref") + el.set("identifier", identifier) el.text = text return el @@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str: return f"{relative}#{anchor}" -def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable: +def _legacy_fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable: """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub). In our context, we match Markdown references and replace them with HTML links. @@ -216,7 +222,84 @@ def inner(match: Match) -> str: return inner -def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str]]: +class _AutorefsAttrs(dict): + _handled_attrs: ClassVar[set[str]] = {"identifier", "optional", "hover", "class"} + + @property + def remaining(self) -> str: + return " ".join(k if v is None else f'{k}="{v}"' for k, v in self.items() if k not in self._handled_attrs) + + +class _HTMLAttrsParser(HTMLParser): + def __init__(self): + super().__init__() + self.attrs = {} + + def parse(self, html: str) -> _AutorefsAttrs: + self.attrs.clear() + self.feed(html) + return _AutorefsAttrs(self.attrs) + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: # noqa: ARG002 + self.attrs.update(attrs) + + +_html_attrs_parser = _HTMLAttrsParser() + + +def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable: + """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub). + + In our context, we match Markdown references and replace them with HTML links. + + When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer + `unmapped` list. It generally means the user is trying to cross-reference an object that was not collected + and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning. + + Arguments: + url_mapper: A callable that gets an object's site URL by its identifier, + such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][]. + unmapped: A list to store unmapped identifiers. + + Returns: + The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects) + and returning the replacement strings. + """ + + def inner(match: Match) -> str: + title = match["title"] + attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>") + identifier: str = attrs["identifier"] + optional = "optional" in attrs + hover = "hover" in attrs + + try: + url = url_mapper(unescape(identifier)) + except KeyError: + if optional: + if hover: + return f'<span title="{identifier}">{title}</span>' + return title + unmapped.append(identifier) + if title == identifier: + return f"[{identifier}][]" + return f"[{title}][{identifier}]" + + parsed = urlsplit(url) + external = parsed.scheme or parsed.netloc + classes = (attrs.get("class") or "").strip().split() + classes = ["autorefs", "autorefs-external" if external else "autorefs-internal", *classes] + class_attr = " ".join(classes) + if remaining := attrs.remaining: + remaining = f" {remaining}" + if optional and hover: + return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}"{remaining}>{title}</a>' + return f'<a class="{class_attr}" href="{escape(url)}"{remaining}>{title}</a>' + + return inner + + +def fix_refs(html: str, url_mapper: Callable[[str], str], *, _legacy_refs: bool = True) -> tuple[str, list[str]]: """Fix all references in the given HTML text. Arguments: @@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str The fixed HTML. """ unmapped: list[str] = [] - html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html) + html = AUTOREF_RE.sub(fix_ref(url_mapper, unmapped), html) + if _legacy_refs: + html = AUTO_REF_RE.sub(_legacy_fix_ref(url_mapper, unmapped), html) return html, unmapped diff --git a/tests/test_references.py b/tests/test_references.py index f687afb..748eacf 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -212,7 +212,7 @@ def test_ignore_reference_with_special_char() -> None: ) -def test_custom_required_reference() -> None: +def test_legacy_custom_required_reference() -> None: """Check that external HTML-based references are expanded or reported missing.""" url_map = {"ok": "ok.html#ok"} source = "<span data-autorefs-identifier=bar>foo</span> <span data-autorefs-identifier=ok>ok</span>" @@ -221,7 +221,16 @@ def test_custom_required_reference() -> None: assert unmapped == ["bar"] -def test_custom_optional_reference() -> None: +def test_custom_required_reference() -> None: + """Check that external HTML-based references are expanded or reported missing.""" + url_map = {"ok": "ok.html#ok"} + source = "<autoref identifier=bar>foo</autoref> <autoref identifier=ok>ok</autoref>" + output, unmapped = fix_refs(source, url_map.__getitem__) + assert output == '[foo][bar] <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>' + assert unmapped == ["bar"] + + +def test_legacy_custom_optional_reference() -> None: """Check that optional HTML-based references are expanded and never reported missing.""" url_map = {"ok": "ok.html#ok"} source = '<span data-autorefs-optional="bar">foo</span> <span data-autorefs-optional=ok>ok</span>' @@ -230,7 +239,16 @@ def test_custom_optional_reference() -> None: assert unmapped == [] -def test_custom_optional_hover_reference() -> None: +def test_custom_optional_reference() -> None: + """Check that optional HTML-based references are expanded and never reported missing.""" + url_map = {"ok": "ok.html#ok"} + source = '<autoref optional identifier="bar">foo</autoref> <autoref identifier=ok optional>ok</autoref>' + output, unmapped = fix_refs(source, url_map.__getitem__) + assert output == 'foo <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>' + assert unmapped == [] + + +def test_legacy_custom_optional_hover_reference() -> None: """Check that optional-hover HTML-based references are expanded and never reported missing.""" url_map = {"ok": "ok.html#ok"} source = '<span data-autorefs-optional-hover="bar">foo</span> <span data-autorefs-optional-hover=ok>ok</span>' @@ -242,7 +260,19 @@ def test_custom_optional_hover_reference() -> None: assert unmapped == [] -def test_external_references() -> None: +def test_custom_optional_hover_reference() -> None: + """Check that optional-hover HTML-based references are expanded and never reported missing.""" + url_map = {"ok": "ok.html#ok"} + source = '<autoref optional hover identifier="bar">foo</autoref> <autoref optional identifier=ok hover>ok</autoref>' + output, unmapped = fix_refs(source, url_map.__getitem__) + assert ( + output + == '<span title="bar">foo</span> <a class="autorefs autorefs-internal" title="ok" href="ok.html#ok">ok</a>' + ) + assert unmapped == [] + + +def test_legacy_external_references() -> None: """Check that external references are marked as such.""" url_map = {"example": "https://example.com"} source = '<span data-autorefs-optional="example">example</span>' @@ -251,6 +281,15 @@ def test_external_references() -> None: assert unmapped == [] +def test_external_references() -> None: + """Check that external references are marked as such.""" + url_map = {"example": "https://example.com"} + source = '<autoref optional identifier="example">example</autoref>' + output, unmapped = fix_refs(source, url_map.__getitem__) + assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>' + assert unmapped == [] + + def test_register_markdown_anchors() -> None: """Check that Markdown anchors are registered when enabled.""" plugin = AutorefsPlugin() @@ -333,9 +372,17 @@ def test_register_markdown_anchors_with_admonition() -> None: } -def test_keep_data_attributes() -> None: +def test_legacy_keep_data_attributes() -> None: """Keep HTML data attributes from autorefs spans.""" url_map = {"example": "https://e.com"} source = '<span data-autorefs-optional="example" class="hi ho" data-foo data-bar="0">e</span>' output, _ = fix_refs(source, url_map.__getitem__) assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>' + + +def test_keep_data_attributes() -> None: + """Keep HTML data attributes from autorefs spans.""" + url_map = {"example": "https://e.com"} + source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>' + output, _ = fix_refs(source, url_map.__getitem__) + assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'