From a63e6f3416f2e4fc7c2d5edb2768ceed9218df20 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 3 Nov 2023 15:20:00 +0100 Subject: [PATCH] Fix edge-case crash in InlineProcessor If an inlineprocessor returns an AtomicString (even though that is pointless, a plain string is atomic in that context), there can be an exception in 2 separate places. The added test case was crashing before this change. --- docs/changelog.md | 1 + markdown/treeprocessors.py | 4 ++-- tests/test_apis.py | 25 +++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index c55c9dda..84f0bfaa 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Remove legacy import needed only in Python 2 (#1403) * Fix typo that left the attribute `AdmonitionProcessor.content_indent` unset (#1404) +* Fix edge-case crash in `InlineProcessor` with `AtomicString` (#1406). * Fix edge-case crash in `codehilite` with an empty `code` tag (#1405). * Improve and expand type annotations in the code base (#1401). diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index dc857204..83630999 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -218,7 +218,7 @@ def linkText(text: str | None) -> None: text = data[strartIndex:index] linkText(text) - if not isString(node): # it's Element + if not isinstance(node, str): # it's Element for child in [node] + list(node): if child.tail: if child.tail.strip(): @@ -304,7 +304,7 @@ def __applyPattern( if node is None: return data, True, end - if not isString(node): + if not isinstance(node, str): if not isinstance(node.text, util.AtomicString): # We need to process current node too for child in [node] + list(node): diff --git a/tests/test_apis.py b/tests/test_apis.py index d613a822..55e2cdb6 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -30,6 +30,7 @@ import markdown import warnings from markdown.__main__ import parse_options +from markdown import inlinepatterns from logging import DEBUG, WARNING, CRITICAL import yaml import tempfile @@ -664,8 +665,8 @@ class testAtomicString(unittest.TestCase): """ Test that `AtomicStrings` are honored (not parsed). """ def setUp(self): - md = markdown.Markdown() - self.inlineprocessor = md.treeprocessors['inline'] + self.md = markdown.Markdown() + self.inlineprocessor = self.md.treeprocessors['inline'] def testString(self): """ Test that a regular string is parsed. """ @@ -710,6 +711,26 @@ def testNestedAtomicString(self): '*to* *test* *with*

' ) + def testInlineProcessorDoesntCrashWithWrongAtomicString(self): + """ Test that an `AtomicString` returned from a Pattern doesn't cause a crash. """ + tree = etree.Element('div') + p = etree.SubElement(tree, 'p') + p.text = 'a marker c' + self.md.inlinePatterns.register( + _InlineProcessorThatReturnsAtomicString(r'marker', self.md), 'test', 100 + ) + new = self.inlineprocessor.run(tree) + self.assertEqual( + markdown.serializers.to_html_string(new), + '

a <b>atomic</b> c

' + ) + + +class _InlineProcessorThatReturnsAtomicString(inlinepatterns.InlineProcessor): + """ Return a simple text of `group(1)` of a Pattern. """ + def handleMatch(self, m, data): + return markdown.util.AtomicString('atomic'), m.start(0), m.end(0) + class TestConfigParsing(unittest.TestCase): def assertParses(self, value, result):