From ed5879033282e3293a6e19534f0ab88381d7e4a0 Mon Sep 17 00:00:00 2001 From: Mark Peek Date: Thu, 21 Mar 2024 11:12:10 -0700 Subject: [PATCH] Clean up description via HTML escape As the description is taken from the raw text of the document it can contain some HTML reserved characters. This change will escape the description string to use HTML entities instead. --- sphinxext/opengraph/descriptionparser.py | 4 ++++ .../test-meta-name-description-escape/conf.py | 10 ++++++++++ .../test-meta-name-description-escape/index.rst | 1 + tests/test_options.py | 14 ++++++++++++++ 4 files changed, 29 insertions(+) create mode 100644 tests/roots/test-meta-name-description-escape/conf.py create mode 100644 tests/roots/test-meta-name-description-escape/index.rst diff --git a/sphinxext/opengraph/descriptionparser.py b/sphinxext/opengraph/descriptionparser.py index f8eea29..8583b65 100644 --- a/sphinxext/opengraph/descriptionparser.py +++ b/sphinxext/opengraph/descriptionparser.py @@ -1,3 +1,4 @@ +import html import string from typing import Iterable @@ -72,6 +73,9 @@ def dispatch_visit(self, node: nodes.Element) -> None: if len(node.children) == 0: text = node.astext().replace("\r", "").replace("\n", " ").strip() + # Ensure string contains HTML-safe characters + text = html.escape(text, True) + # Remove double spaces while text.find(" ") != -1: text = text.replace(" ", " ") diff --git a/tests/roots/test-meta-name-description-escape/conf.py b/tests/roots/test-meta-name-description-escape/conf.py new file mode 100644 index 0000000..b31eaac --- /dev/null +++ b/tests/roots/test-meta-name-description-escape/conf.py @@ -0,0 +1,10 @@ +extensions = ["sphinxext.opengraph"] + +master_doc = "index" +exclude_patterns = ["_build"] + +html_theme = "basic" + +ogp_site_url = "http://example.org/en/latest/" + +enable_meta_description = True diff --git a/tests/roots/test-meta-name-description-escape/index.rst b/tests/roots/test-meta-name-description-escape/index.rst new file mode 100644 index 0000000..5c6c1c0 --- /dev/null +++ b/tests/roots/test-meta-name-description-escape/index.rst @@ -0,0 +1 @@ +Lorem dolor sit amet, "consectetur" adipiscing elit. Suspendisse at lorem ornare, fringilla massa nec, venenatis mi. Donec erat sapien, tincidunt nec rhoncus nec, scelerisque id diam. Orci varius natoque penatibus et magnis dis parturient mauris. diff --git a/tests/test_options.py b/tests/test_options.py index 529d133..ff64ce9 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -12,6 +12,11 @@ def get_tag_content(tags, tag_type, kind="property", prefix="og"): return get_tag(tags, tag_type, kind, prefix).get("content", "") +def get_tag_content_text(tags, tag_type, kind="property", prefix="og"): + # Gets the content of a specific ogp tag + return get_tag(tags, tag_type, kind, prefix).get_text("content", "") + + def get_meta_description(tags): return [tag for tag in tags if tag.get("name") == "description"][0].get( "content", "" @@ -39,6 +44,15 @@ def test_meta_name_description(meta_tags): assert description == og_description +@pytest.mark.sphinx("html", testroot="meta-name-description-escape") +def test_meta_name_description(meta_tags): + og_description = get_tag_content(meta_tags, "description") + og_description_text = get_tag_content_text(meta_tags, "description") + + assert '<' in og_description + assert '<' not in og_description_text + + @pytest.mark.sphinx("html", testroot="meta-name-description-manual-description") def test_meta_name_manual_description(meta_tags): og_description = get_tag_content(meta_tags, "description")