diff --git a/novelwriter/core/tohtml.py b/novelwriter/core/tohtml.py
index ac336628f..d9221e291 100644
--- a/novelwriter/core/tohtml.py
+++ b/novelwriter/core/tohtml.py
@@ -37,28 +37,35 @@
logger = logging.getLogger(__name__)
-HTML5_TAGS = {
- Tokenizer.FMT_B_B: "",
- Tokenizer.FMT_B_E: "",
- Tokenizer.FMT_I_B: "",
- Tokenizer.FMT_I_E: "",
- Tokenizer.FMT_D_B: "",
- Tokenizer.FMT_D_E: "",
- Tokenizer.FMT_U_B: "",
- Tokenizer.FMT_U_E: "",
- Tokenizer.FMT_M_B: "",
- Tokenizer.FMT_M_E: "",
- Tokenizer.FMT_SUP_B: "",
- Tokenizer.FMT_SUP_E: "",
- Tokenizer.FMT_SUB_B: "",
- Tokenizer.FMT_SUB_E: "",
- Tokenizer.FMT_DL_B: "",
- Tokenizer.FMT_DL_E: "",
- Tokenizer.FMT_ADL_B: "",
- Tokenizer.FMT_ADL_E: "",
- Tokenizer.FMT_STRIP: "",
+# Each opener tag, with the id of its corresponding closer and tag format
+HTML_OPENER: dict[int, tuple[int, str]] = {
+ Tokenizer.FMT_B_B: (Tokenizer.FMT_B_E, ""),
+ Tokenizer.FMT_I_B: (Tokenizer.FMT_I_E, ""),
+ Tokenizer.FMT_D_B: (Tokenizer.FMT_D_E, ""),
+ Tokenizer.FMT_U_B: (Tokenizer.FMT_U_E, ""),
+ Tokenizer.FMT_M_B: (Tokenizer.FMT_M_E, ""),
+ Tokenizer.FMT_SUP_B: (Tokenizer.FMT_SUP_E, ""),
+ Tokenizer.FMT_SUB_B: (Tokenizer.FMT_SUB_E, ""),
+ Tokenizer.FMT_DL_B: (Tokenizer.FMT_DL_E, ""),
+ Tokenizer.FMT_ADL_B: (Tokenizer.FMT_ADL_E, ""),
}
+# Each closer tag, with the id of its corresponding opener and tag format
+HTML_CLOSER: dict[int, tuple[int, str]] = {
+ Tokenizer.FMT_B_E: (Tokenizer.FMT_B_B, ""),
+ Tokenizer.FMT_I_E: (Tokenizer.FMT_I_B, ""),
+ Tokenizer.FMT_D_E: (Tokenizer.FMT_D_B, ""),
+ Tokenizer.FMT_U_E: (Tokenizer.FMT_U_B, ""),
+ Tokenizer.FMT_M_E: (Tokenizer.FMT_M_B, ""),
+ Tokenizer.FMT_SUP_E: (Tokenizer.FMT_SUP_B, ""),
+ Tokenizer.FMT_SUB_E: (Tokenizer.FMT_SUB_B, ""),
+ Tokenizer.FMT_DL_E: (Tokenizer.FMT_DL_B, ""),
+ Tokenizer.FMT_ADL_E: (Tokenizer.FMT_ADL_B, ""),
+}
+
+# Empty HTML tag record
+HTML_NONE = (0, "")
+
class ToHtml(Tokenizer):
"""Core: HTML Document Writer
@@ -447,19 +454,46 @@ def getStyleSheet(self) -> list[str]:
def _formatText(self, text: str, tFmt: T_Formats) -> str:
"""Apply formatting tags to text."""
temp = text
- for pos, fmt, data in reversed(tFmt):
- html = ""
- if fmt == self.FMT_FNOTE:
+
+ # Build a list of all html tags that need to be inserted in the text.
+ # This is done in the forward direction, and a tag is only opened if it
+ # isn't already open, and only closed if it has previously been opened.
+ tags: list[tuple[int, str]] = []
+ state = dict.fromkeys(HTML_OPENER, False)
+ for pos, fmt, data in tFmt:
+ if m := HTML_OPENER.get(fmt):
+ if not state.get(fmt, True):
+ tags.append((pos, m[1]))
+ state[fmt] = True
+ elif m := HTML_CLOSER.get(fmt):
+ if state.get(m[0], False):
+ tags.append((pos, m[1]))
+ state[m[0]] = False
+ elif fmt == self.FMT_FNOTE:
if data in self._footnotes:
index = len(self._usedNotes) + 1
self._usedNotes[data] = index
- html = f"{index}"
+ tags.append((pos, f"{index}"))
else:
- html = "ERR"
- else:
- html = HTML5_TAGS.get(fmt, "")
- temp = f"{temp[:pos]}{html}{temp[pos:]}"
+ tags.append((pos, "ERR"))
+
+ # Check all format types and close any tag that is still open. This
+ # ensures that unclosed tags don't spill over to the next paragraph.
+ end = len(text)
+ for opener, active in state.items():
+ if active:
+ closer = HTML_OPENER.get(opener, HTML_NONE)[0]
+ tags.append((end, HTML_CLOSER.get(closer, HTML_NONE)[1]))
+
+ # Insert all tags at their correct position, starting from the back.
+ # The reverse order ensures that the positions are not shifted while we
+ # insert tags.
+ for pos, tag in reversed(tags):
+ temp = f"{temp[:pos]}{tag}{temp[pos:]}"
+
+ # Replace all line breaks with proper HTML break tags
temp = temp.replace("\n", "
")
+
return stripEscape(temp)
def _formatSynopsis(self, text: str, synopsis: bool) -> str:
diff --git a/tests/test_core/test_core_tohtml.py b/tests/test_core/test_core_tohtml.py
index 9e150295c..7974cef4d 100644
--- a/tests/test_core/test_core_tohtml.py
+++ b/tests/test_core/test_core_tohtml.py
@@ -275,12 +275,12 @@ def testCoreToHtml_ConvertParagraphs(mockGUI):
CONFIG.altDialogOpen = "::"
CONFIG.altDialogClose = "::"
html.setDialogueHighlight(True)
- html._text = "## Chapter\n\nThis text :: has alt dialogue :: in it.\n\n"
+ html._text = "## Chapter\n\nThis text ::has alt dialogue:: in it.\n\n"
html.tokenizeText()
html.doConvert()
assert html.result == (
"
This text :: has alt dialogue :: in it.
\n" + "This text ::has alt dialogue:: in it.
\n" ) # Footnotes @@ -308,6 +308,42 @@ def testCoreToHtml_ConvertParagraphs(mockGUI): ) +@pytest.mark.core +def testCoreToHtml_CloseTags(mockGUI): + """Test automatic closing of HTML tags for shortcodes.""" + project = NWProject() + html = ToHtml(project) + + html._isNovel = True + html._isFirst = True + + # Unclosed Shortcodes + html._text = "Text [b][i][s][u][m][sup][sub]text text text.\n" + html.tokenizeText() + html.doConvert() + assert html.result == ( + "Text "
+ "text text text.
Text "
+ "text text text.
Text text text text.
\n" + ) + + @pytest.mark.core def testCoreToHtml_ConvertDirect(mockGUI): """Test the converter directly using the ToHtml class."""