Skip to content

Commit

Permalink
Improve the text tokenizer (#1885)
Browse files Browse the repository at this point in the history
  • Loading branch information
vkbo committed May 21, 2024
2 parents d2fca3b + 3fd62d0 commit e95d330
Show file tree
Hide file tree
Showing 15 changed files with 261 additions and 384 deletions.
1 change: 1 addition & 0 deletions novelwriter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class nwHeaders:

H_VALID = ("H0", "H1", "H2", "H3", "H4")
H_LEVEL = {"H0": 0, "H1": 1, "H2": 2, "H3": 3, "H4": 4}
H_SIZES = {0: 1.00, 1: 2.00, 2: 1.75, 3: 1.50, 4: 1.25}


class nwFiles:
Expand Down
2 changes: 1 addition & 1 deletion novelwriter/core/docbuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from novelwriter.core.project import NWProject
from novelwriter.core.tohtml import ToHtml
from novelwriter.core.tokenizer import Tokenizer
from novelwriter.core.tomd import ToMarkdown
from novelwriter.core.tomarkdown import ToMarkdown
from novelwriter.core.toodt import ToOdt
from novelwriter.enum import nwBuildFmt
from novelwriter.error import formatException, logException
Expand Down
32 changes: 8 additions & 24 deletions novelwriter/core/tohtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ def doConvert(self) -> None:
h3 = "h3"
h4 = "h4"

para = []
lines = []
pStyle = None
tHandle = self._handle

for tType, nHead, tText, tFormat, tStyle in self._tokens:
Expand Down Expand Up @@ -240,37 +238,27 @@ def doConvert(self) -> None:
aNm = ""

# Process Text Type
if tType == self.T_EMPTY:
if pStyle is None:
pStyle = ""
if len(para) > 1 and self._cssStyles:
pClass = " class='break'"
else:
pClass = ""
if len(para) > 0:
tTemp = "<br/>".join(para)
lines.append(f"<p{pClass+pStyle}>{tTemp.rstrip()}</p>\n")
para = []
pStyle = None
if tType == self.T_TEXT:
lines.append(f"<p{hStyle}>{self._formatText(tText, tFormat, hTags)}</p>\n")

elif tType == self.T_TITLE:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<h1 class='title'{hStyle}>{aNm}{tHead}</h1>\n")

elif tType == self.T_HEAD1:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h1}{h1Cl}{hStyle}>{aNm}{tHead}</{h1}>\n")

elif tType == self.T_HEAD2:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")

elif tType == self.T_HEAD3:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h3}{hStyle}>{aNm}{tHead}</{h3}>\n")

elif tType == self.T_HEAD4:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h4}{hStyle}>{aNm}{tHead}</{h4}>\n")

elif tType == self.T_SEP:
Expand All @@ -279,11 +267,6 @@ def doConvert(self) -> None:
elif tType == self.T_SKIP:
lines.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")

elif tType == self.T_TEXT:
if pStyle is None:
pStyle = hStyle
para.append(self._formatText(tText, tFormat, hTags).rstrip())

elif tType == self.T_SYNOPSIS and self._doSynopsis:
lines.append(self._formatSynopsis(self._formatText(tText, tFormat, hTags), True))

Expand Down Expand Up @@ -491,6 +474,7 @@ def _formatText(self, text: str, tFmt: T_Formats, tags: dict[int, str]) -> str:
else:
html = tags.get(fmt, "ERR")
temp = f"{temp[:pos]}{html}{temp[pos:]}"
temp = temp.replace("\n", "<br>")
return stripEscape(temp)

def _formatSynopsis(self, text: str, synopsis: bool) -> str:
Expand Down
Loading

0 comments on commit e95d330

Please sign in to comment.