diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index 0dafa2d6..5fefe028 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -91,25 +91,24 @@ def fromCodePoint(c: int) -> str: r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});", re.IGNORECASE, ) -DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})") -DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE) +DIGITAL_ENTITY_TEST_RE = re.compile( + r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$", re.IGNORECASE +) def replaceEntityPattern(match: str, name: str) -> str: """Convert HTML entity patterns, see https://spec.commonmark.org/0.30/#entity-references """ + code = 0 + if name in entities: return entities[name] - code: None | int = None - if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name): - code = int(pat.group(1), 10) - elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name): - code = int(pat.group(1), 16) - - if code is not None and isValidEntityCode(code): - return fromCodePoint(code) + if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): + code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) + if isValidEntityCode(code): + return fromCodePoint(code) return match diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index f42b2244..ed758dfc 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -15,9 +15,8 @@ def __init__(self) -> None: self.str = "" -def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: - lines = 0 - start = pos +def parseLinkDestination(string: str, start: int, maximum: int) -> _Result: + pos = start result = _Result() if charCodeAt(string, pos) == 0x3C: # /* < */ @@ -80,7 +79,6 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: return result result.str = unescapeAll(string[start:pos]) - result.lines = lines result.pos = pos result.ok = True return result diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index fe23ea71..ba82adc3 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -16,9 +16,9 @@ def __str__(self) -> str: return self.str -def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result: +def parseLinkTitle(string: str, start: int, maximum: int) -> _Result: lines = 0 - start = pos + pos = start result = _Result() if pos >= maximum: diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index 3c4d4019..1458f17d 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -58,6 +58,7 @@ def __init__(self) -> None: def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: """Generate tokens for input range.""" + ok = False rules = self.ruler.getRules("") line = startLine maxNesting = state.md.options.maxNesting @@ -82,10 +83,19 @@ def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None: # - update `state.line` # - update `state.tokens` # - return True + prevLine = state.line + for rule in rules: - if rule(state, line, endLine, False): + ok = rule(state, line, endLine, False) + if ok: + if prevLine >= state.line: + raise Exception("block rule didn't increment state.line") break + # this can only happen if user disables paragraph rule + if not ok: + raise Exception("none of the block rules matched") + # set state.tight if we had an empty line before current tag # i.e. latest empty line should not count state.tight = not hasEmptyLines diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py index 8f3ac1e6..4fae6722 100644 --- a/markdown_it/parser_inline.py +++ b/markdown_it/parser_inline.py @@ -84,6 +84,8 @@ def skipToken(self, state: StateInline) -> None: ok = rule(state, True) state.level -= 1 if ok: + if pos >= state.pos: + raise Exception("inline rule didn't increment state.pos") break else: # Too much nesting, just skip until the end of the paragraph. @@ -117,11 +119,14 @@ def tokenize(self, state: StateInline) -> None: # - update `state.pos` # - update `state.tokens` # - return true + prevPos = state.pos if state.level < maxNesting: for rule in rules: ok = rule(state, False) if ok: + if prevPos >= state.pos: + raise Exception("inline rule didn't increment state.pos") break if ok: diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 3e289e9e..60940d33 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -1,7 +1,7 @@ - package: markdown-it/markdown-it version: 13.0.1 - commit: e843acc9edad115cbf8cf85e676443f01658be08 - date: May 3, 2022 + commit: 49ca65bbef067c7dba63468a48c4aee3048607dc + date: Sep 26, 2023 notes: - Rename variables that use python built-in names, e.g. - `max` -> `maximum` diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py index 6d60589a..b69329ea 100644 --- a/markdown_it/renderer.py +++ b/markdown_it/renderer.py @@ -77,11 +77,12 @@ def render( result = "" for i, token in enumerate(tokens): - if token.type == "inline": + type = token.type + if type == "inline": if token.children: result += self.renderInline(token.children, options, env) - elif token.type in self.rules: - result += self.rules[token.type](tokens, i, options, env) + elif type in self.rules: + result += self.rules[type](tokens, i, options, env) else: result += self.renderToken(tokens, i, options, env) @@ -217,7 +218,7 @@ def code_inline( "" - + escapeHtml(tokens[idx].content) + + escapeHtml(token.content) + "" ) diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 0c9081b9..d8b7c493 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -27,81 +27,16 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> return False except IndexError: return False - pos += 1 # we know that it's going to be a valid blockquote, # so no point trying to find the end of it in silent mode if silent: return True - # set offset past spaces and ">" - initial = offset = state.sCount[startLine] + 1 - - try: - second_char: str | None = state.src[pos] - except IndexError: - second_char = None - - # skip one optional space after '>' - if second_char == " ": - # ' > test ' - # ^ -- position start of line here: - pos += 1 - initial += 1 - offset += 1 - adjustTab = False - spaceAfterMarker = True - elif second_char == "\t": - spaceAfterMarker = True - - if (state.bsCount[startLine] + offset) % 4 == 3: - # ' >\t test ' - # ^ -- position start of line here (tab has width==1) - pos += 1 - initial += 1 - offset += 1 - adjustTab = False - else: - # ' >\t test ' - # ^ -- position start of line here + shift bsCount slightly - # to make extra space appear - adjustTab = True - - else: - spaceAfterMarker = False - - oldBMarks = [state.bMarks[startLine]] - state.bMarks[startLine] = pos - - while pos < max: - ch = state.src[pos] - - if isStrSpace(ch): - if ch == "\t": - offset += ( - 4 - - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4 - ) - else: - offset += 1 - - else: - break - - pos += 1 - - oldBSCount = [state.bsCount[startLine]] - state.bsCount[startLine] = ( - state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0) - ) - - lastLineEmpty = pos >= max - - oldSCount = [state.sCount[startLine]] - state.sCount[startLine] = offset - initial - - oldTShift = [state.tShift[startLine]] - state.tShift[startLine] = pos - state.bMarks[startLine] + oldBMarks = [] + oldBSCount = [] + oldSCount = [] + oldTShift = [] terminatorRules = state.md.block.ruler.getRules("blockquote") @@ -127,8 +62,8 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # - - - # ``` - # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) { - nextLine = startLine + 1 + # for (nextLine = startLine; nextLine < endLine; nextLine++) { + nextLine = startLine while nextLine < endLine: # check if it's outdented, i.e. it's inside list item and indented # less than said list item: @@ -153,7 +88,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # This line is inside the blockquote. # set offset past spaces and ">" - initial = offset = state.sCount[nextLine] + 1 + initial = state.sCount[nextLine] + 1 try: next_char: str | None = state.src[pos] @@ -166,18 +101,16 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # ^ -- position start of line here: pos += 1 initial += 1 - offset += 1 adjustTab = False spaceAfterMarker = True elif next_char == "\t": spaceAfterMarker = True - if (state.bsCount[nextLine] + offset) % 4 == 3: + if (state.bsCount[nextLine] + initial) % 4 == 3: # ' >\t test ' # ^ -- position start of line here (tab has width==1) pos += 1 initial += 1 - offset += 1 adjustTab = False else: # ' >\t test ' @@ -188,6 +121,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> else: spaceAfterMarker = False + offset = initial oldBMarks.append(state.bMarks[nextLine]) state.bMarks[nextLine] = pos diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index d8070d74..6b5ca932 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -101,10 +101,11 @@ def markTightParagraphs(state: StateBlock, idx: int) -> None: def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent) + nextLine = startLine isTerminatingParagraph = False tight = True - if state.is_code_block(startLine): + if state.is_code_block(nextLine): return False # Special case: @@ -115,8 +116,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # - this one is a paragraph continuation if ( state.listIndent >= 0 - and state.sCount[startLine] - state.listIndent >= 4 - and state.sCount[startLine] < state.blkIndent + and state.sCount[nextLine] - state.listIndent >= 4 + and state.sCount[nextLine] < state.blkIndent ): return False @@ -130,15 +131,15 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if ( silent and state.parentType == "paragraph" - and state.sCount[startLine] >= state.blkIndent + and state.sCount[nextLine] >= state.blkIndent ): isTerminatingParagraph = True # Detect list type and position after marker - posAfterMarker = skipOrderedListMarker(state, startLine) + posAfterMarker = skipOrderedListMarker(state, nextLine) if posAfterMarker >= 0: isOrdered = True - start = state.bMarks[startLine] + state.tShift[startLine] + start = state.bMarks[nextLine] + state.tShift[nextLine] markerValue = int(state.src[start : posAfterMarker - 1]) # If we're starting a new ordered list right after @@ -146,7 +147,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if isTerminatingParagraph and markerValue != 1: return False else: - posAfterMarker = skipBulletListMarker(state, startLine) + posAfterMarker = skipBulletListMarker(state, nextLine) if posAfterMarker >= 0: isOrdered = False else: @@ -156,17 +157,17 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # a paragraph, first line should not be empty. if ( isTerminatingParagraph - and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine] + and state.skipSpaces(posAfterMarker) >= state.eMarks[nextLine] ): return False - # We should terminate list on style change. Remember first one to compare. - markerChar = state.src[posAfterMarker - 1] - # For validation mode we can terminate immediately if silent: return True + # We should terminate list on style change. Remember first one to compare. + markerChar = state.src[posAfterMarker - 1] + # Start list listTokIdx = len(state.tokens) @@ -178,14 +179,13 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> else: token = state.push("bullet_list_open", "ul", 1) - token.map = listLines = [startLine, 0] + token.map = listLines = [nextLine, 0] token.markup = markerChar # # Iterate list items # - nextLine = startLine prevEmptyEnd = False terminatorRules = state.md.block.ruler.getRules("list") @@ -199,7 +199,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = ( state.sCount[nextLine] + posAfterMarker - - (state.bMarks[startLine] + state.tShift[startLine]) + - (state.bMarks[nextLine] + state.tShift[nextLine]) ) while pos < maximum: @@ -231,14 +231,14 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Run subparser & write tokens token = state.push("list_item_open", "li", 1) token.markup = markerChar - token.map = itemLines = [startLine, 0] + token.map = itemLines = [nextLine, 0] if isOrdered: token.info = state.src[start : posAfterMarker - 1] # change current state, then restore it after parser subcall oldTight = state.tight - oldTShift = state.tShift[startLine] - oldSCount = state.sCount[startLine] + oldTShift = state.tShift[nextLine] + oldSCount = state.sCount[nextLine] # - example list # ^ listIndent position will be here @@ -249,10 +249,10 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.blkIndent = indent state.tight = True - state.tShift[startLine] = contentStart - state.bMarks[startLine] - state.sCount[startLine] = offset + state.tShift[nextLine] = contentStart - state.bMarks[nextLine] + state.sCount[nextLine] = offset - if contentStart >= maximum and state.isEmpty(startLine + 1): + if contentStart >= maximum and state.isEmpty(nextLine + 1): # workaround for this case # (list item is empty, list terminates before "foo"): # ~~~~~~~~ @@ -263,9 +263,9 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.line = min(state.line + 2, endLine) else: # NOTE in list.js this was: - # state.md.block.tokenize(state, startLine, endLine, True) - # but tokeniz does not take the final parameter - state.md.block.tokenize(state, startLine, endLine) + # state.md.block.tokenize(state, nextLine, endLine, True) + # but tokenize does not take the final parameter + state.md.block.tokenize(state, nextLine, endLine) # If any of list item is tight, mark list as tight if (not state.tight) or prevEmptyEnd: @@ -273,32 +273,30 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Item become loose if finish with empty line, # but we should filter last element, because it means list finish - prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1) + prevEmptyEnd = (state.line - nextLine) > 1 and state.isEmpty(state.line - 1) state.blkIndent = state.listIndent state.listIndent = oldListIndent - state.tShift[startLine] = oldTShift - state.sCount[startLine] = oldSCount + state.tShift[nextLine] = oldTShift + state.sCount[nextLine] = oldSCount state.tight = oldTight token = state.push("list_item_close", "li", -1) token.markup = markerChar - nextLine = startLine = state.line + nextLine = state.line itemLines[1] = nextLine if nextLine >= endLine: break - contentStart = state.bMarks[startLine] - # # Try to check if list is terminated or continued. # if state.sCount[nextLine] < state.blkIndent: break - if state.is_code_block(startLine): + if state.is_code_block(nextLine): break # fail if terminating block found diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py index 30ba8777..cfdaf222 100644 --- a/markdown_it/rules_block/paragraph.py +++ b/markdown_it/rules_block/paragraph.py @@ -15,7 +15,6 @@ def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> nextLine = startLine + 1 ruler = state.md.block.ruler terminatorRules = ruler.getRules("paragraph") - endLine = state.lineMax oldParentType = state.parentType state.parentType = "paragraph" diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index fc60d6b1..4cad9de1 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -29,7 +29,7 @@ def backtick(state: StateInline, silent: bool) -> bool: state.pos += openerLength return True - matchStart = matchEnd = pos + matchEnd = pos # Nothing found in the cache, scan until the end of the line (or until marker is found) while True: diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py index 9c63b27f..c3a968e6 100644 --- a/markdown_it/rules_inline/balance_pairs.py +++ b/markdown_it/rules_inline/balance_pairs.py @@ -5,7 +5,7 @@ from .state_inline import Delimiter, StateInline -def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: +def processDelimiters(delimiters: list[Delimiter]) -> None: """For each opening emphasis-like marker find a matching closing one.""" if not delimiters: return @@ -128,11 +128,11 @@ def link_pairs(state: StateInline) -> None: tokens_meta = state.tokens_meta maximum = len(state.tokens_meta) - processDelimiters(state, state.delimiters) + processDelimiters(state.delimiters) curr = 0 while curr < maximum: curr_meta = tokens_meta[curr] if curr_meta and "delimiters" in curr_meta: - processDelimiters(state, curr_meta["delimiters"]) + processDelimiters(curr_meta["delimiters"]) curr += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 9065e1d0..a2a73bb9 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -32,7 +32,7 @@ def html_inline(state: StateInline, silent: bool) -> bool: if not silent: token = state.push("html_inline", "", 0) - token.content = state.src[pos : pos + len(match.group(0))] + token.content = match.group(0) if isLinkOpen(token.content): state.linkLevel += 1