From 9edba85fc14f034b7109534220702bf60178ff15 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Mar 2024 09:44:38 -0500 Subject: [PATCH] Refactor abbr escaping A alternate fix to #1444. This does not exclude the use of carrots or square brackets in abbreviations. It still excludes backslashse, however. I played with backslashes and it just doesn't make sense to support them as they have special meaning in the Markdown, not because of their use in regular expressions. --- docs/changelog.md | 3 +- docs/extensions/abbreviations.md | 10 ++--- markdown/extensions/abbr.py | 14 ++---- tests/test_syntax/extensions/test_abbr.py | 52 +++++++++++++++++------ 4 files changed, 46 insertions(+), 33 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index a71dbf25..9c2b302e 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -34,7 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Include `scripts/*.py` in the generated source tarballs (#1430). * Ensure lines after heading in loose list are properly detabbed (#1443). * Give smarty tree processor higher priority than toc (#1440). -* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444). +* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude + backslashes (`\`) from abbreviations (#1444). ## [3.5.2] -- 2024-01-10 diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md index 9a98a91b..8a35e526 100644 --- a/docs/extensions/abbreviations.md +++ b/docs/extensions/abbreviations.md @@ -36,13 +36,9 @@ will be rendered as: is maintained by the W3C.

``` -The following three characters are not permitted in an abbreviation. Any -abbreviation definitions which include one will not be recognized as an -abbreviation definition. - -1. carrot (`^`) -2. backslash (`\`) -3. left square bracket (`]`) +The backslash (`\`) is not permitted in an abbreviation. Any abbreviation +definitions which include one or more backslashes between the square brackets +will not be recognized as an abbreviation definition. Usage ----- diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 46d3f35c..1c7185b2 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -41,7 +41,7 @@ def extendMarkdown(self, md): class AbbrPreprocessor(BlockProcessor): """ Abbreviation Preprocessor - parse text for abbr references. """ - RE = re.compile(r'^[*]\[(?P[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) + RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) def test(self, parent: etree.Element, block: str) -> bool: return True @@ -72,16 +72,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool: return False def _generate_pattern(self, text: str) -> str: - """ - Given a string, returns a regex pattern to match that string. - - 'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)' - - Note: we force each char as a literal match via a character set (in brackets) - as we don't know what they will be beforehand. - - """ - return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)" + """ Given a string, returns a regex pattern to match that string. """ + return f"(?P<abbr>\\b{ re.escape(text) }\\b)" class AbbrInlineProcessor(InlineProcessor): diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py index 708af51b..e11e8d30 100644 --- a/tests/test_syntax/extensions/test_abbr.py +++ b/tests/test_syntax/extensions/test_abbr.py @@ -24,6 +24,7 @@ class TestAbbr(TestCase): + maxDiff = None default_kwargs = {'extensions': ['abbr']} @@ -260,28 +261,19 @@ def test_abbr_single_quoted(self): ) ) - def test_abbr_ignore_special_chars(self): + def test_abbr_ignore_backslash(self): self.assertMarkdownRenders( self.dedent( r""" - [^] [\\] [\]] []] + \\foo - *[^]: Not an abbreviation - - *[\\]: Not an abbreviation - - *[\]]: Not an abbreviation - - *[]]: Not an abbreviation + *[\\foo]: Not an abbreviation """ ), self.dedent( r""" - <p>[^] [\] []] []]</p> - <p>*[^]: Not an abbreviation</p> - <p>*[\]: Not an abbreviation</p> - <p>*[]]: Not an abbreviation</p> - <p>*[]]: Not an abbreviation</p> + <p>\foo</p> + <p>*[\foo]: Not an abbreviation</p> """ ) ) @@ -301,3 +293,35 @@ def test_abbr_hyphen(self): """ ) ) + + def test_abbr_carrot(self): + self.assertMarkdownRenders( + self.dedent( + """ + ABBR^abbr + + *[ABBR^abbr]: Abbreviation + """ + ), + self.dedent( + """ + <p><abbr title="Abbreviation">ABBR^abbr</abbr></p> + """ + ) + ) + + def test_abbr_bracket(self): + self.assertMarkdownRenders( + self.dedent( + """ + ABBR]abbr + + *[ABBR]abbr]: Abbreviation + """ + ), + self.dedent( + """ + <p><abbr title="Abbreviation">ABBR]abbr</abbr></p> + """ + ) + )