diff --git a/docs/changelog.md b/docs/changelog.md index 33f05ce8..8deaefd2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Include `scripts/*.py` in the generated source tarballs (#1430). * Ensure lines after heading in loose list are properly detabbed (#1443). * Give smarty tree processor higher priority than toc (#1440). +* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444). ## [3.5.2] -- 2024-01-10 diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md index d03651f0..9a98a91b 100644 --- a/docs/extensions/abbreviations.md +++ b/docs/extensions/abbreviations.md @@ -36,6 +36,14 @@ will be rendered as: is maintained by the W3C.

``` +The following three characters are not permitted in an abbreviation. Any +abbreviation definitions which include one will not be recognized as an +abbreviation definition. + +1. carrot (`^`) +2. backslash (`\`) +3. left square bracket (`]`) + Usage ----- diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 738368af..46d3f35c 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -41,7 +41,7 @@ def extendMarkdown(self, md): class AbbrPreprocessor(BlockProcessor): """ Abbreviation Preprocessor - parse text for abbr references. """ - RE = re.compile(r'^[*]\[(?P[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) + RE = re.compile(r'^[*]\[(?P<abbr>[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) def test(self, parent: etree.Element, block: str) -> bool: return True @@ -73,18 +73,15 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool: def _generate_pattern(self, text: str) -> str: """ - Given a string, returns an regex pattern to match that string. + Given a string, returns a regex pattern to match that string. - 'HTML' -> r'(?P<abbr>[H][T][M][L])' + 'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)' - Note: we force each char as a literal match (in brackets) as we don't - know what they will be beforehand. + Note: we force each char as a literal match via a character set (in brackets) + as we don't know what they will be beforehand. """ - chars = list(text) - for i in range(len(chars)): - chars[i] = r'[%s]' % chars[i] - return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) + return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)" class AbbrInlineProcessor(InlineProcessor): diff --git a/tests/test_extensions.py b/tests/test_extensions.py index a9e789f1..c96772ff 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -85,35 +85,6 @@ def testConfigAsKwargsOnInit(self): self.assertEqual(ext.getConfigs(), {'foo': 'baz', 'bar': 'blah'}) -class TestAbbr(unittest.TestCase): - """ Test abbr extension. """ - - def setUp(self): - self.md = markdown.Markdown(extensions=['abbr']) - - def testSimpleAbbr(self): - """ Test Abbreviations. """ - text = 'Some text with an ABBR and a REF. Ignore REFERENCE and ref.' + \ - '\n\n*[ABBR]: Abbreviation\n' + \ - '*[REF]: Abbreviation Reference' - self.assertEqual( - self.md.convert(text), - '<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> ' - 'and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore ' - 'REFERENCE and ref.</p>' - ) - - def testNestedAbbr(self): - """ Test Nested Abbreviations. """ - text = '[ABBR](/foo) and _ABBR_\n\n' + \ - '*[ABBR]: Abbreviation' - self.assertEqual( - self.md.convert(text), - '<p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a> ' - 'and <em><abbr title="Abbreviation">ABBR</abbr></em></p>' - ) - - class TestMetaData(unittest.TestCase): """ Test `MetaData` extension. """ diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py index fbb25ffb..708af51b 100644 --- a/tests/test_syntax/extensions/test_abbr.py +++ b/tests/test_syntax/extensions/test_abbr.py @@ -95,6 +95,25 @@ def test_abbr_override(self): ) ) + def test_abbr_nested(self): + self.assertMarkdownRenders( + self.dedent( + """ + [ABBR](/foo) + + _ABBR_ + + *[ABBR]: Abbreviation + """ + ), + self.dedent( + """ + <p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a></p> + <p><em><abbr title="Abbreviation">ABBR</abbr></em></p> + """ + ) + ) + def test_abbr_no_blank_Lines(self): self.assertMarkdownRenders( self.dedent( @@ -240,3 +259,45 @@ def test_abbr_single_quoted(self): """ ) ) + + def test_abbr_ignore_special_chars(self): + self.assertMarkdownRenders( + self.dedent( + r""" + [^] [\\] [\]] []] + + *[^]: Not an abbreviation + + *[\\]: Not an abbreviation + + *[\]]: Not an abbreviation + + *[]]: Not an abbreviation + """ + ), + self.dedent( + r""" + <p>[^] [\] []] []]</p> + <p>*[^]: Not an abbreviation</p> + <p>*[\]: Not an abbreviation</p> + <p>*[]]: Not an abbreviation</p> + <p>*[]]: Not an abbreviation</p> + """ + ) + ) + + def test_abbr_hyphen(self): + self.assertMarkdownRenders( + self.dedent( + """ + ABBR-abbr + + *[ABBR-abbr]: Abbreviation + """ + ), + self.dedent( + """ + <p><abbr title="Abbreviation">ABBR-abbr</abbr></p> + """ + ) + )