Skip to content

Commit

Permalink
♻️ Parse entities to text_special token (#280)
Browse files Browse the repository at this point in the history
Rather than adding directly to text.
The `text_join` core rule then joins it to the text later, but after typographic rules have been applied.

Implements upstream: https://github.com/markdown-it/markdown-it/commita/3fc0deb38b5a8b2eb8f46c727cc4e299e5ae5f9c
  • Loading branch information
chrisjsewell authored Jun 2, 2023
1 parent ea27cc8 commit d5b3874
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 32 deletions.
67 changes: 35 additions & 32 deletions markdown_it/rules_inline/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,38 @@ def entity(state: StateInline, silent: bool) -> bool:
if state.src[pos] != "&":
return False

if (pos + 1) < maximum:
if state.src[pos + 1] == "#":
match = DIGITAL_RE.search(state.src[pos:])
if match:
if not silent:
match1 = match.group(1)
code = (
int(match1[1:], 16)
if match1[0].lower() == "x"
else int(match1, 10)
)
state.pending += (
fromCodePoint(code)
if isValidEntityCode(code)
else fromCodePoint(0xFFFD)
)

state.pos += len(match.group(0))
return True

else:
match = NAMED_RE.search(state.src[pos:])
if match and match.group(1) in entities:
if not silent:
state.pending += entities[match.group(1)]
state.pos += len(match.group(0))
return True

if not silent:
state.pending += "&"
state.pos += 1
return True
if pos + 1 >= maximum:
return False

if state.src[pos + 1] == "#":
if match := DIGITAL_RE.search(state.src[pos:]):
if not silent:
match1 = match.group(1)
code = (
int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
)

token = state.push("text_special", "", 0)
token.content = (
fromCodePoint(code)
if isValidEntityCode(code)
else fromCodePoint(0xFFFD)
)
token.markup = match.group(0)
token.info = "entity"

state.pos += len(match.group(0))
return True

else:
if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
if not silent:
token = state.push("text_special", "", 0)
token.content = entities[match.group(1)]
token.markup = match.group(0)
token.info = "entity"

state.pos += len(match.group(0))
return True

return False
13 changes: 13 additions & 0 deletions tests/test_port/fixtures/smartquotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,16 @@ Should be escapable:
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.

Should not replace entities:
.
&quot;foo&quot;

&quot;foo"

"foo&quot;
.
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.
7 changes: 7 additions & 0 deletions tests/test_port/fixtures/typographer.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,10 @@ regression tests for #624
<p>1–2–3</p>
<p>1 – – 3</p>
.

shouldn't replace entities
.
&#40;c) (c&#41; (c)
.
<p>(c) (c) ©</p>
.

0 comments on commit d5b3874

Please sign in to comment.