From 5c35ebeffa33a8be0b635b9367115feeb95c8e96 Mon Sep 17 00:00:00 2001 From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com> Date: Thu, 13 Jun 2024 01:00:01 +0200 Subject: [PATCH 1/2] Rewrite HTML tag insertion to check for correct open and close of tags (#1919) --- novelwriter/core/tohtml.py | 90 ++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/novelwriter/core/tohtml.py b/novelwriter/core/tohtml.py index ac336628f..d9221e291 100644 --- a/novelwriter/core/tohtml.py +++ b/novelwriter/core/tohtml.py @@ -37,28 +37,35 @@ logger = logging.getLogger(__name__) -HTML5_TAGS = { - Tokenizer.FMT_B_B: "", - Tokenizer.FMT_B_E: "", - Tokenizer.FMT_I_B: "", - Tokenizer.FMT_I_E: "", - Tokenizer.FMT_D_B: "", - Tokenizer.FMT_D_E: "", - Tokenizer.FMT_U_B: "", - Tokenizer.FMT_U_E: "", - Tokenizer.FMT_M_B: "", - Tokenizer.FMT_M_E: "", - Tokenizer.FMT_SUP_B: "", - Tokenizer.FMT_SUP_E: "", - Tokenizer.FMT_SUB_B: "", - Tokenizer.FMT_SUB_E: "", - Tokenizer.FMT_DL_B: "", - Tokenizer.FMT_DL_E: "", - Tokenizer.FMT_ADL_B: "", - Tokenizer.FMT_ADL_E: "", - Tokenizer.FMT_STRIP: "", +# Each opener tag, with the id of its corresponding closer and tag format +HTML_OPENER: dict[int, tuple[int, str]] = { + Tokenizer.FMT_B_B: (Tokenizer.FMT_B_E, ""), + Tokenizer.FMT_I_B: (Tokenizer.FMT_I_E, ""), + Tokenizer.FMT_D_B: (Tokenizer.FMT_D_E, ""), + Tokenizer.FMT_U_B: (Tokenizer.FMT_U_E, ""), + Tokenizer.FMT_M_B: (Tokenizer.FMT_M_E, ""), + Tokenizer.FMT_SUP_B: (Tokenizer.FMT_SUP_E, ""), + Tokenizer.FMT_SUB_B: (Tokenizer.FMT_SUB_E, ""), + Tokenizer.FMT_DL_B: (Tokenizer.FMT_DL_E, ""), + Tokenizer.FMT_ADL_B: (Tokenizer.FMT_ADL_E, ""), } +# Each closer tag, with the id of its corresponding opener and tag format +HTML_CLOSER: dict[int, tuple[int, str]] = { + Tokenizer.FMT_B_E: (Tokenizer.FMT_B_B, ""), + Tokenizer.FMT_I_E: (Tokenizer.FMT_I_B, ""), + Tokenizer.FMT_D_E: (Tokenizer.FMT_D_B, ""), + Tokenizer.FMT_U_E: (Tokenizer.FMT_U_B, ""), + Tokenizer.FMT_M_E: (Tokenizer.FMT_M_B, ""), + Tokenizer.FMT_SUP_E: (Tokenizer.FMT_SUP_B, ""), + Tokenizer.FMT_SUB_E: (Tokenizer.FMT_SUB_B, ""), + Tokenizer.FMT_DL_E: (Tokenizer.FMT_DL_B, ""), + Tokenizer.FMT_ADL_E: (Tokenizer.FMT_ADL_B, ""), +} + +# Empty HTML tag record +HTML_NONE = (0, "") + class ToHtml(Tokenizer): """Core: HTML Document Writer @@ -447,19 +454,46 @@ def getStyleSheet(self) -> list[str]: def _formatText(self, text: str, tFmt: T_Formats) -> str: """Apply formatting tags to text.""" temp = text - for pos, fmt, data in reversed(tFmt): - html = "" - if fmt == self.FMT_FNOTE: + + # Build a list of all html tags that need to be inserted in the text. + # This is done in the forward direction, and a tag is only opened if it + # isn't already open, and only closed if it has previously been opened. + tags: list[tuple[int, str]] = [] + state = dict.fromkeys(HTML_OPENER, False) + for pos, fmt, data in tFmt: + if m := HTML_OPENER.get(fmt): + if not state.get(fmt, True): + tags.append((pos, m[1])) + state[fmt] = True + elif m := HTML_CLOSER.get(fmt): + if state.get(m[0], False): + tags.append((pos, m[1])) + state[m[0]] = False + elif fmt == self.FMT_FNOTE: if data in self._footnotes: index = len(self._usedNotes) + 1 self._usedNotes[data] = index - html = f"{index}" + tags.append((pos, f"{index}")) else: - html = "ERR" - else: - html = HTML5_TAGS.get(fmt, "") - temp = f"{temp[:pos]}{html}{temp[pos:]}" + tags.append((pos, "ERR")) + + # Check all format types and close any tag that is still open. This + # ensures that unclosed tags don't spill over to the next paragraph. + end = len(text) + for opener, active in state.items(): + if active: + closer = HTML_OPENER.get(opener, HTML_NONE)[0] + tags.append((end, HTML_CLOSER.get(closer, HTML_NONE)[1])) + + # Insert all tags at their correct position, starting from the back. + # The reverse order ensures that the positions are not shifted while we + # insert tags. + for pos, tag in reversed(tags): + temp = f"{temp[:pos]}{tag}{temp[pos:]}" + + # Replace all line breaks with proper HTML break tags temp = temp.replace("\n", "
") + return stripEscape(temp) def _formatSynopsis(self, text: str, synopsis: bool) -> str: From babc39c6a2a6be8635de55e7f5e6fe91ee44b974 Mon Sep 17 00:00:00 2001 From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:48:09 +0200 Subject: [PATCH 2/2] Add test coverage --- tests/test_core/test_core_tohtml.py | 40 +++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/test_core/test_core_tohtml.py b/tests/test_core/test_core_tohtml.py index 9e150295c..7974cef4d 100644 --- a/tests/test_core/test_core_tohtml.py +++ b/tests/test_core/test_core_tohtml.py @@ -275,12 +275,12 @@ def testCoreToHtml_ConvertParagraphs(mockGUI): CONFIG.altDialogOpen = "::" CONFIG.altDialogClose = "::" html.setDialogueHighlight(True) - html._text = "## Chapter\n\nThis text :: has alt dialogue :: in it.\n\n" + html._text = "## Chapter\n\nThis text ::has alt dialogue:: in it.\n\n" html.tokenizeText() html.doConvert() assert html.result == ( "

Chapter

\n" - "

This text :: has alt dialogue :: in it.

\n" + "

This text ::has alt dialogue:: in it.

\n" ) # Footnotes @@ -308,6 +308,42 @@ def testCoreToHtml_ConvertParagraphs(mockGUI): ) +@pytest.mark.core +def testCoreToHtml_CloseTags(mockGUI): + """Test automatic closing of HTML tags for shortcodes.""" + project = NWProject() + html = ToHtml(project) + + html._isNovel = True + html._isFirst = True + + # Unclosed Shortcodes + html._text = "Text [b][i][s][u][m][sup][sub]text text text.\n" + html.tokenizeText() + html.doConvert() + assert html.result == ( + "

Text " + "text text text.

\n" + ) + + # Double Shortcodes + html._text = "Text [b][i][s][u][m][sup][sub]text [b][i][s][u][m][sup][sub]text text.\n" + html.tokenizeText() + html.doConvert() + assert html.result == ( + "

Text " + "text text text.

\n" + ) + + # Redundant Close Shortcodes + html._text = "Text text [/b][/i][/s][/u][/m][/sup][/sub]text text.\n" + html.tokenizeText() + html.doConvert() + assert html.result == ( + "

Text text text text.

\n" + ) + + @pytest.mark.core def testCoreToHtml_ConvertDirect(mockGUI): """Test the converter directly using the ToHtml class."""