Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow attr_list quoted values to contain curly braces #1414

Merged
merged 9 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 41 additions & 26 deletions markdown/extensions/attr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def _handle_word(s, t):


_scanner = re.Scanner([
(r'[^ =]+=".*?"', _handle_double_quote),
(r"[^ =]+='.*?'", _handle_single_quote),
(r'[^ =]+=[^ =]+', _handle_key_value),
(r'[^ =]+', _handle_word),
(r'[^ =}]+=".*?"', _handle_double_quote),
(r"[^ =}]+='.*?'", _handle_single_quote),
(r'[^ =}]+=[^ =}]+', _handle_key_value),
(r'[^ =}]+', _handle_word),
(r' ', None)
])

Expand All @@ -76,7 +76,7 @@ def isheader(elem: Element) -> bool:

class AttrListTreeprocessor(Treeprocessor):

BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
Expand Down Expand Up @@ -106,49 +106,62 @@ def run(self, doc: Element) -> None:
# use tail of last child. no `ul` or `ol`.
m = RE.search(elem[-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
elif pos is not None and pos > 0 and elem[pos-1].tail:
# use tail of last child before `ul` or `ol`
m = RE.search(elem[pos-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
elif elem.text:
# use text. `ul` is first child.
m = RE.search(elem.text)
if m:
self.assign_attrs(elem, m.group(1))
elem.text = elem.text[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
elif len(elem) and elem[-1].tail:
# has children. Get from tail of last child
m = RE.search(elem[-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
if isheader(elem):
# clean up trailing #s
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
if isheader(elem):
# clean up trailing #s
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
elif elem.text:
# no children. Get from text.
m = RE.search(elem.text)
if m:
self.assign_attrs(elem, m.group(1))
elem.text = elem.text[:m.start()]
if isheader(elem):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
if isheader(elem):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
else:
# inline: check for `attrs` at start of tail
if elem.tail:
m = self.INLINE_RE.match(elem.tail)
if m:
self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():]
remainder = self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():] + remainder

def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
""" Assign `attrs` to element.

If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

The `strict` argument controls whether to still assign attrs if there is a remaining `}`.
"""
attrs, remainder = _scanner.scan(attrs_string)
# To keep historic behavior, discard all un-parseable text prior to '}'.
index = remainder.find('}')
remainder = remainder[index:] if index != -1 else ''

if strict and remainder:
return remainder

def assign_attrs(self, elem: Element, attrs: str) -> None:
""" Assign `attrs` to element. """
for k, v in get_attrs(attrs):
for k, v in attrs:
if k == '.':
# add to class
cls = elem.get('class')
Expand All @@ -159,6 +172,8 @@ def assign_attrs(self, elem: Element, attrs: str) -> None:
else:
# assign attribute `k` with `v`
elem.set(self.sanitize_name(k), v)
# The text that we initially over-matched will be put back.
return remainder

def sanitize_name(self, name: str) -> str:
"""
Expand Down
7 changes: 6 additions & 1 deletion tests/extensions/attr_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,9 @@ <h1>Bad attributes</h1>
<p><em>More weirdness</em></p>
<p>This should not cause a <em foo="a">crash</em></p>
<p>Attr_lists do not contain <em>newlines</em>{ foo=bar
key=value }</p>
key=value }</p>
<h1 data-test="{}">Attrs</h1>
<p>attr_list values can have curly <em data-test="{hi{}" foo="bar">braces</em></p>
<h2>attr_list curly needs to be at the end {.foo} hi</h2>
<h2>attr_list curly needs to be at the end {.foo test=&rdquo;{&rdquo; } }</h2>
<p><em class="a">Multiple</em> } <em class="b">items</em> inline</p>
10 changes: 10 additions & 0 deletions tests/extensions/attr_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,13 @@ This should not cause a *crash*{ foo=a=b }

Attr_lists do not contain *newlines*{ foo=bar
key=value }

# Attrs {data-test="{}"}

attr_list values can have curly *braces*{ data-test='{hi{}' foo="bar" }

## attr_list curly needs to be at the end {.foo} hi

## attr_list curly needs to be at the end {.foo test="{" } }

*Multiple*{.a} } *items*{.b} inline
Loading