Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the text tokenizer #1885

Merged
merged 9 commits into from
May 21, 2024
1 change: 1 addition & 0 deletions novelwriter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class nwHeaders:

H_VALID = ("H0", "H1", "H2", "H3", "H4")
H_LEVEL = {"H0": 0, "H1": 1, "H2": 2, "H3": 3, "H4": 4}
H_SIZES = {0: 1.00, 1: 2.00, 2: 1.75, 3: 1.50, 4: 1.25}


class nwFiles:
Expand Down
2 changes: 1 addition & 1 deletion novelwriter/core/docbuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from novelwriter.core.project import NWProject
from novelwriter.core.tohtml import ToHtml
from novelwriter.core.tokenizer import Tokenizer
from novelwriter.core.tomd import ToMarkdown
from novelwriter.core.tomarkdown import ToMarkdown
from novelwriter.core.toodt import ToOdt
from novelwriter.enum import nwBuildFmt
from novelwriter.error import formatException, logException
Expand Down
32 changes: 8 additions & 24 deletions novelwriter/core/tohtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ def doConvert(self) -> None:
h3 = "h3"
h4 = "h4"

para = []
lines = []
pStyle = None
tHandle = self._handle

for tType, nHead, tText, tFormat, tStyle in self._tokens:
Expand Down Expand Up @@ -240,37 +238,27 @@ def doConvert(self) -> None:
aNm = ""

# Process Text Type
if tType == self.T_EMPTY:
if pStyle is None:
pStyle = ""
if len(para) > 1 and self._cssStyles:
pClass = " class='break'"
else:
pClass = ""
if len(para) > 0:
tTemp = "<br/>".join(para)
lines.append(f"<p{pClass+pStyle}>{tTemp.rstrip()}</p>\n")
para = []
pStyle = None
if tType == self.T_TEXT:
lines.append(f"<p{hStyle}>{self._formatText(tText, tFormat, hTags)}</p>\n")

elif tType == self.T_TITLE:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<h1 class='title'{hStyle}>{aNm}{tHead}</h1>\n")

elif tType == self.T_HEAD1:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h1}{h1Cl}{hStyle}>{aNm}{tHead}</{h1}>\n")

elif tType == self.T_HEAD2:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h2}{hStyle}>{aNm}{tHead}</{h2}>\n")

elif tType == self.T_HEAD3:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h3}{hStyle}>{aNm}{tHead}</{h3}>\n")

elif tType == self.T_HEAD4:
tHead = tText.replace(nwHeadFmt.BR, "<br/>")
tHead = tText.replace(nwHeadFmt.BR, "<br>")
lines.append(f"<{h4}{hStyle}>{aNm}{tHead}</{h4}>\n")

elif tType == self.T_SEP:
Expand All @@ -279,11 +267,6 @@ def doConvert(self) -> None:
elif tType == self.T_SKIP:
lines.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")

elif tType == self.T_TEXT:
if pStyle is None:
pStyle = hStyle
para.append(self._formatText(tText, tFormat, hTags).rstrip())

elif tType == self.T_SYNOPSIS and self._doSynopsis:
lines.append(self._formatSynopsis(self._formatText(tText, tFormat, hTags), True))

Expand Down Expand Up @@ -491,6 +474,7 @@ def _formatText(self, text: str, tFmt: T_Formats, tags: dict[int, str]) -> str:
else:
html = tags.get(fmt, "ERR")
temp = f"{temp[:pos]}{html}{temp[pos:]}"
temp = temp.replace("\n", "<br>")
return stripEscape(temp)

def _formatSynopsis(self, text: str, synopsis: bool) -> str:
Expand Down
Loading