Python-Markdown · waylan · Mar 12, 2024 · Nov 10, 2023 · Mar 8, 2024 · Mar 8, 2024
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -57,10 +57,10 @@ def _handle_word(s, t):
 
 
 _scanner = re.Scanner([
-    (r'[^ =]+=".*?"', _handle_double_quote),
-    (r"[^ =]+='.*?'", _handle_single_quote),
-    (r'[^ =]+=[^ =]+', _handle_key_value),
-    (r'[^ =]+', _handle_word),
+    (r'[^ =}]+=".*?"', _handle_double_quote),
+    (r"[^ =}]+='.*?'", _handle_single_quote),
+    (r'[^ =}]+=[^ =}]+', _handle_key_value),
+    (r'[^ =}]+', _handle_word),
     (r' ', None)
 ])
 
@@ -76,7 +76,7 @@ def isheader(elem: Element) -> bool:
 
 class AttrListTreeprocessor(Treeprocessor):
 
-    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
+    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
     HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
     BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
     INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
@@ -106,49 +106,62 @@ def run(self, doc: Element) -> None:
                         # use tail of last child. no `ul` or `ol`.
                         m = RE.search(elem[-1].tail)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem[-1].tail = elem[-1].tail[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[-1].tail = elem[-1].tail[:m.start()]
                     elif pos is not None and pos > 0 and elem[pos-1].tail:
                         # use tail of last child before `ul` or `ol`
                         m = RE.search(elem[pos-1].tail)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                     elif elem.text:
                         # use text. `ul` is first child.
                         m = RE.search(elem.text)
                         if m:
-                            self.assign_attrs(elem, m.group(1))
-                            elem.text = elem.text[:m.start()]
+                            if not self.assign_attrs(elem, m.group(1), strict=True):
+                                elem.text = elem.text[:m.start()]
                 elif len(elem) and elem[-1].tail:
                     # has children. Get from tail of last child
                     m = RE.search(elem[-1].tail)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem[-1].tail = elem[-1].tail[:m.start()]
-                        if isheader(elem):
-                            # clean up trailing #s
-                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem[-1].tail = elem[-1].tail[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                 elif elem.text:
                     # no children. Get from text.
                     m = RE.search(elem.text)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem.text = elem.text[:m.start()]
-                        if isheader(elem):
-                            # clean up trailing #s
-                            elem.text = elem.text.rstrip('#').rstrip()
+                        if not self.assign_attrs(elem, m.group(1), strict=True):
+                            elem.text = elem.text[:m.start()]
+                            if isheader(elem):
+                                # clean up trailing #s
+                                elem.text = elem.text.rstrip('#').rstrip()
             else:
                 # inline: check for `attrs` at start of tail
                 if elem.tail:
                     m = self.INLINE_RE.match(elem.tail)
                     if m:
-                        self.assign_attrs(elem, m.group(1))
-                        elem.tail = elem.tail[m.end():]
+                        remainder = self.assign_attrs(elem, m.group(1))
+                        elem.tail = elem.tail[m.end():] + remainder
+
+    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
+        """ Assign `attrs` to element.
+
+        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
+
+        The `strict` argument controls whether to still assign attrs if there is a remaining `}`.
+        """
+        attrs, remainder = _scanner.scan(attrs_string)
+        # To keep historic behavior, discard all un-parseable text prior to '}'.
+        index = remainder.find('}')
+        remainder = remainder[index:] if index != -1 else ''
+
+        if strict and remainder:
+            return remainder
 
-    def assign_attrs(self, elem: Element, attrs: str) -> None:
-        """ Assign `attrs` to element. """
-        for k, v in get_attrs(attrs):
+        for k, v in attrs:
             if k == '.':
                 # add to class
                 cls = elem.get('class')
@@ -159,6 +172,8 @@ def assign_attrs(self, elem: Element, attrs: str) -> None:
             else:
                 # assign attribute `k` with `v`
                 elem.set(self.sanitize_name(k), v)
+        # The text that we initially over-matched will be put back.
+        return remainder
 
     def sanitize_name(self, name: str) -> str:
         """

diff --git a/tests/extensions/attr_list.html b/tests/extensions/attr_list.html
@@ -66,4 +66,9 @@ <h1>Bad attributes</h1>
 <p><em>More weirdness</em></p>
 <p>This should not cause a <em foo="a">crash</em></p>
 <p>Attr_lists do not contain <em>newlines</em>{ foo=bar
-key=value }</p>
+key=value }</p>
+<h1 data-test="{}">Attrs</h1>
+<p>attr_list values can have curly <em data-test="{hi{}" foo="bar">braces</em></p>
+<h2>attr_list curly needs to be at the end {.foo} hi</h2>
+<h2>attr_list curly needs to be at the end {.foo test=&rdquo;{&rdquo; } }</h2>
+<p><em class="a">Multiple</em> } <em class="b">items</em> inline</p>
diff --git a/tests/extensions/attr_list.txt b/tests/extensions/attr_list.txt
@@ -92,3 +92,13 @@ This should not cause a *crash*{ foo=a=b }
 
 Attr_lists do not contain *newlines*{ foo=bar
 key=value }
+
+# Attrs {data-test="{}"}
+
+attr_list values can have curly *braces*{ data-test='{hi{}' foo="bar" }
+
+## attr_list curly needs to be at the end {.foo} hi
+
+## attr_list curly needs to be at the end {.foo test="{" } }
+
+*Multiple*{.a} } *items*{.b} inline