From 2916eb27dec89287dcaa1aefb4e9532156b66e30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Sun, 1 Sep 2024 15:14:32 +0200 Subject: [PATCH] feat: Add option to resolve autorefs to closest URLs when multiple ones are found Issue-52: https://github.com/mkdocstrings/autorefs/issues/52 --- README.md | 44 +++++++++++++++++++- src/mkdocs_autorefs/plugin.py | 76 ++++++++++++++++++++++++++++++++++- tests/test_plugin.py | 24 +++++++++++ 3 files changed, 141 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b754668..b424123 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,38 @@ We can [link to that heading][hello-world] from another page too. This works the same as [a normal link to that heading](../doc1.md#hello-world). ``` -Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. And the issue is somewhat exacerbated by the fact that [MkDocs supports only *relative* links between pages](https://github.com/mkdocs/mkdocs/issues/1592). +Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. -Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. In such cases, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings. +### Non-unique headings + +When linking to a heading that appears several times throughout the site, this plugin will log a warning message stating that multiple URLs were found and that headings should be made unique, and will resolve the link using the first found URL. + +To prevent getting warnings, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings, and use these aliases when referencing the headings. + +If you cannot use Markdown anchors, for example because you inject the same generated contents in multiple locations (for example mkdocstrings' API documentation), then you can try to alleviate the warnings by enabling the `resolve_closest` option: + +```yaml +plugins: +- autorefs: + resolve_closest: true +``` + +When `resolve_closest` is enabled, and multiple URLs are found for the same identifier, the plugin will try to resolve to the one that is "closest" to the current page (the page containing the link). By closest, we mean: + +- URLs that are relative to the current page's URL, climbing up parents +- if multiple URLs are relative to it, use the one at the shortest distance if possible. + +If multiple relative URLs are at the same distance, the first of these URLs will be used. If no URL is relative to the current page's URL, the first URL of all found URLs will be used. + +Examples: + +Current page | Candidate URLs | Relative URLs | Winner +------------ | -------------- | ------------- | ------ +` ` | `x/#b`, `#b` | `#b` | `#b` (only one relative) +`a/` | `b/c/#d`, `c/#d` | none | `b/c/#d` (no relative, use first one, even if longer distance) +`a/b/` | `x/#e`, `a/c/#e`, `a/d/#e` | `a/c/#e`, `a/d/#e` (relative to parent `a/`) | `a/c/#e` (same distance, use first one) +`a/b/` | `x/#e`, `a/c/d/#e`, `a/c/#e` | `a/c/d/#e`, `a/c/#e` (relative to parent `a/`) | `a/c/#e` (shortest distance) +`a/b/c/` | `x/#e`, `a/#e`, `a/b/#e`, `a/b/c/d/#e`, `a/b/c/#e` | `a/b/c/d/#e`, `a/b/c/#e` | `a/b/c/#e` (shortest distance) ### Markdown anchors @@ -143,3 +172,14 @@ You don't want to change headings and make them redundant, like `## Arch: Instal ``` ...changing `arch` by `debian`, `gentoo`, etc. in the other pages. + +--- + +You can also change the actual identifier of a heading, thanks again to the `attr_list` Markdown extension: + +```md +## Install from sources { #arch-install-src } +... +``` + +...though note that this will impact the URL anchor too (and therefore the permalink to the heading). diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 2b457ff..57b441a 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -15,9 +15,12 @@ import contextlib import functools import logging +import sys from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from mkdocs.config.base import Config +from mkdocs.config.config_options import Type from mkdocs.plugins import BasePlugin from mkdocs.structure.pages import Page @@ -37,6 +40,41 @@ log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] +# YORE: EOL 3.8: Remove block. +if sys.version_info < (3, 9): + from pathlib import PurePosixPath + + class URL(PurePosixPath): # noqa: D101 + def is_relative_to(self, *args: Any) -> bool: # noqa: D102 + try: + self.relative_to(*args) + except ValueError: + return False + return True +else: + from pathlib import PurePosixPath as URL # noqa: N814 + + +class AutorefsConfig(Config): + """Configuration options for the `autorefs` plugin.""" + + resolve_closest = Type(bool, default=False) + """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier. + + By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page". + + For example, if you link to identifier `hello` from page `foo/bar/`, + and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages, + autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`. + + If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs. + If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found. + + When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL. + """ + + +class AutorefsPlugin(BasePlugin[AutorefsConfig]): """The `autorefs` plugin for `mkdocs`. This plugin defines the following event hooks: @@ -83,10 +121,44 @@ def register_url(self, identifier: str, url: str) -> None: """ self._abs_url_map[identifier] = url + @staticmethod + def _get_closest_url(from_url: str, urls: list[str]) -> str: + """Return the closest URL to the current page. + + Arguments: + from_url: The URL of the base page, from which we link towards the targeted pages. + urls: A list of URLs to choose from. + + Returns: + The closest URL to the current page. + """ + base_url = URL(from_url) + + while True: + if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]: + break + base_url = base_url.parent + if not base_url.name: + break + + if not candidates: + log.warning( + "Could not find closest URL (from %s, candidates: %s). " + "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", + from_url, + urls, + ) + return urls[0] + + winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/")) + log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls) + return winner + def _get_item_url( self, identifier: str, fallback: Callable[[str], Sequence[str]] | None = None, + from_url: str | None = None, ) -> str: try: urls = self._url_map[identifier] @@ -103,6 +175,8 @@ def _get_item_url( raise if len(urls) > 1: + if self.config.resolve_closest and from_url is not None: + return self._get_closest_url(from_url, urls) log.warning( "Multiple URLs found for '%s': %s. " "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", @@ -127,7 +201,7 @@ def get_item_url( Returns: A site-relative URL. """ - url = self._get_item_url(identifier, fallback) + url = self._get_item_url(identifier, fallback, from_url) if from_url is not None: parsed = urlsplit(url) if not parsed.scheme and not parsed.netloc: diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 8acd446..2a23655 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None: plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",)) == "../../foo/bar/baz.html#foo.bar.baz" ) + + +@pytest.mark.parametrize( + ("base", "urls", "expected"), + [ + # One URL is closest. + ("", ["x/#b", "#b"], "#b"), + # Several URLs are equally close. + ("a/b", ["x/#e", "a/c/#e", "a/d/#e"], "a/c/#e"), + ("a/b/", ["x/#e", "a/d/#e", "a/c/#e"], "a/d/#e"), + # Two close URLs, one is shorter (closer). + ("a/b", ["x/#e", "a/c/#e", "a/c/d/#e"], "a/c/#e"), + ("a/b/", ["x/#e", "a/c/d/#e", "a/c/#e"], "a/c/#e"), + # Deeper-nested URLs. + ("a/b/c", ["x/#e", "a/#e", "a/b/#e", "a/b/c/#e", "a/b/c/d/#e"], "a/b/c/#e"), + ("a/b/c/", ["x/#e", "a/#e", "a/b/#e", "a/b/c/d/#e", "a/b/c/#e"], "a/b/c/#e"), + # No closest URL, use first one even if longer distance. + ("a", ["b/c/#d", "c/#d"], "b/c/#d"), + ("a/", ["c/#d", "b/c/#d"], "c/#d"), + ], +) +def test_find_closest_url(base: str, urls: list[str], expected: str) -> None: + """Find closest URLs given a list of URLs.""" + assert AutorefsPlugin._get_closest_url(base, urls) == expected