Clean up description via HTML escape

As the description is taken from the raw text of the document it can contain some HTML reserved characters. This change will escape the description string to use HTML entities instead.
wpilibsuite · Mar 21, 2024 · ed58790 · ed58790
1 parent 1da6c73
commit ed58790
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 0 deletions.
diff --git a/sphinxext/opengraph/descriptionparser.py b/sphinxext/opengraph/descriptionparser.py
@@ -1,3 +1,4 @@
+import html
 import string
 from typing import Iterable
 
@@ -72,6 +73,9 @@ def dispatch_visit(self, node: nodes.Element) -> None:
         if len(node.children) == 0:
             text = node.astext().replace("\r", "").replace("\n", " ").strip()
 
+            # Ensure string contains HTML-safe characters
+            text = html.escape(text, True)
+
             # Remove double spaces
             while text.find("  ") != -1:
                 text = text.replace("  ", " ")

diff --git a/tests/roots/test-meta-name-description-escape/conf.py b/tests/roots/test-meta-name-description-escape/conf.py
@@ -0,0 +1,10 @@
+extensions = ["sphinxext.opengraph"]
+
+master_doc = "index"
+exclude_patterns = ["_build"]
+
+html_theme = "basic"
+
+ogp_site_url = "http://example.org/en/latest/"
+
+enable_meta_description = True
diff --git a/tests/roots/test-meta-name-description-escape/index.rst b/tests/roots/test-meta-name-description-escape/index.rst
@@ -0,0 +1 @@
+Lorem <ipsum> dolor sit amet, "consectetur" adipiscing elit. Suspendisse at lorem ornare, fringilla massa nec, venenatis mi. Donec erat sapien, tincidunt nec rhoncus nec, scelerisque id diam. Orci varius natoque penatibus et magnis dis parturient mauris.
diff --git a/tests/test_options.py b/tests/test_options.py
@@ -12,6 +12,11 @@ def get_tag_content(tags, tag_type, kind="property", prefix="og"):
     return get_tag(tags, tag_type, kind, prefix).get("content", "")
 
 
+def get_tag_content_text(tags, tag_type, kind="property", prefix="og"):
+    # Gets the content of a specific ogp tag
+    return get_tag(tags, tag_type, kind, prefix).get_text("content", "")
+
+
 def get_meta_description(tags):
     return [tag for tag in tags if tag.get("name") == "description"][0].get(
         "content", ""
@@ -39,6 +44,15 @@ def test_meta_name_description(meta_tags):
     assert description == og_description
 
 
+@pytest.mark.sphinx("html", testroot="meta-name-description-escape")
+def test_meta_name_description(meta_tags):
+    og_description = get_tag_content(meta_tags, "description")
+    og_description_text = get_tag_content_text(meta_tags, "description")
+
+    assert '<' in og_description
+    assert '<' not in og_description_text
+
+
 @pytest.mark.sphinx("html", testroot="meta-name-description-manual-description")
 def test_meta_name_manual_description(meta_tags):
     og_description = get_tag_content(meta_tags, "description")