Skip to content

Commit

Permalink
Clean up description via HTML escape
Browse files Browse the repository at this point in the history
As the description is taken from the raw text of the document it
can contain some HTML reserved characters. This change will escape
the description string to use HTML entities instead.
  • Loading branch information
markpeek committed Mar 21, 2024
1 parent 1da6c73 commit ed58790
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 0 deletions.
4 changes: 4 additions & 0 deletions sphinxext/opengraph/descriptionparser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import html
import string
from typing import Iterable

Expand Down Expand Up @@ -72,6 +73,9 @@ def dispatch_visit(self, node: nodes.Element) -> None:
if len(node.children) == 0:
text = node.astext().replace("\r", "").replace("\n", " ").strip()

# Ensure string contains HTML-safe characters
text = html.escape(text, True)

# Remove double spaces
while text.find(" ") != -1:
text = text.replace(" ", " ")
Expand Down
10 changes: 10 additions & 0 deletions tests/roots/test-meta-name-description-escape/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
extensions = ["sphinxext.opengraph"]

master_doc = "index"
exclude_patterns = ["_build"]

html_theme = "basic"

ogp_site_url = "http://example.org/en/latest/"

enable_meta_description = True
1 change: 1 addition & 0 deletions tests/roots/test-meta-name-description-escape/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Lorem <ipsum> dolor sit amet, "consectetur" adipiscing elit. Suspendisse at lorem ornare, fringilla massa nec, venenatis mi. Donec erat sapien, tincidunt nec rhoncus nec, scelerisque id diam. Orci varius natoque penatibus et magnis dis parturient mauris.
14 changes: 14 additions & 0 deletions tests/test_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ def get_tag_content(tags, tag_type, kind="property", prefix="og"):
return get_tag(tags, tag_type, kind, prefix).get("content", "")


def get_tag_content_text(tags, tag_type, kind="property", prefix="og"):
# Gets the content of a specific ogp tag
return get_tag(tags, tag_type, kind, prefix).get_text("content", "")


def get_meta_description(tags):
return [tag for tag in tags if tag.get("name") == "description"][0].get(
"content", ""
Expand Down Expand Up @@ -39,6 +44,15 @@ def test_meta_name_description(meta_tags):
assert description == og_description


@pytest.mark.sphinx("html", testroot="meta-name-description-escape")
def test_meta_name_description(meta_tags):
og_description = get_tag_content(meta_tags, "description")
og_description_text = get_tag_content_text(meta_tags, "description")

assert '<' in og_description
assert '<' not in og_description_text


@pytest.mark.sphinx("html", testroot="meta-name-description-manual-description")
def test_meta_name_manual_description(meta_tags):
og_description = get_tag_content(meta_tags, "description")
Expand Down

0 comments on commit ed58790

Please sign in to comment.