Skip to content

Commit

Permalink
Properly parse self-closing stop-nodes
Browse files Browse the repository at this point in the history
Work around a bug in fast-xml-parser: NaturalIntelligence/fast-xml-parser#654

Also use the `parseNodeValue` flag to disable parsing node values to numbers and booleans, removing the need for other workarounds.

Changelog: fixed
  • Loading branch information
smoores-dev committed May 13, 2024
1 parent a6cbd65 commit b442190
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .yarn/versions/8340cb85.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
releases:
"@storyteller/web": patch
14 changes: 13 additions & 1 deletion web/__tests__/epub.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import { join } from "node:path"
import { describe, it } from "node:test"
import { Epub, getBody, textContent } from "../epub"
import { Epub, ParsedXml, getBody, textContent } from "../epub"
import assert from "node:assert"
import { stat } from "node:fs/promises"

void describe("xhtml parsing", () => {
void it("can handle self-closing stop nodes", () => {
const xmlString = `<script src="script.js"/>`
const parsed = Epub.xhtmlParser.parse(xmlString) as ParsedXml

console.log(parsed)
const built = Epub.xhtmlBuilder.build(parsed) as string

assert.strictEqual(built, xmlString)
})
})

void describe("Epub", () => {
void it("can read from an archived .epub file", async () => {
const filepath = join("__fixtures__", "moby-dick.epub")
Expand Down
18 changes: 14 additions & 4 deletions web/epub.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,20 @@ export class Epub {
htmlEntities: true,
trimValues: false,
stopNodes: ["*.pre", "*.script"],
numberParseOptions: {
hex: false,
leadingZeros: false,
skipLike: /.*/,
parseTagValue: false,
updateTag(_tagName, _jPath, attrs) {
// There's never an attribute called "/";
// this erroneously happens sometimes when parsing
// self-closing stop nodes with ignoreAttributes: false
// and allowBooleanAttributes: true.
//
// Also attrs is undefined if there are no attrs;
// the types are wrong.
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (attrs && "@_/" in attrs) {
delete attrs["@_/"]
}
return true
},
})

Expand Down
4 changes: 2 additions & 2 deletions web/synchronize/__tests__/getXhtmlSentences.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { describe, it } from "node:test"
import { getXHtmlSentences } from "../getXhtmlSentences"
import { strict as assert } from "node:assert/strict"

void describe.only("getXhtmlSentences", () => {
void describe("getXhtmlSentences", () => {
void it("gets sentences from a text node", () => {
const input = [
{
Expand Down Expand Up @@ -54,7 +54,7 @@ void describe.only("getXhtmlSentences", () => {
])
})

void it.only("gets sentences from nested elements", () => {
void it("gets sentences from nested elements", () => {
const input: ParsedXml = [
{
p: [
Expand Down
2 changes: 1 addition & 1 deletion web/synchronize/tagSentences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ function tagSentencesInXml(
const remainingSentence = sentences[currentSentenceIndex]!.slice(
currentSentenceProgress,
)
const text = currentNode["#text"].toString()
const text = currentNode["#text"]
const remainingNodeText = text.slice(currentNodeProgress)
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const index = remainingNodeText.indexOf(remainingSentence[0]!)
Expand Down

0 comments on commit b442190

Please sign in to comment.