Skip to content

Commit

Permalink
refactor: Fix how indices are computed, add attrib indices (#929)
Browse files Browse the repository at this point in the history
The previous approach had several short-comings; eg. _special_ comments like `</12>` or CDATA in HTML would have misreported indices.

As a new feature, attributes will now have indices set appropriately.
  • Loading branch information
fb55 authored Aug 27, 2021
1 parent 4e25252 commit 28c162b
Show file tree
Hide file tree
Showing 44 changed files with 926 additions and 69 deletions.
49 changes: 18 additions & 31 deletions src/Parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,30 +79,6 @@ describe("API", () => {
expect(text).toBe("0&#xn");
});

test("should update the position", () => {
const p = new Parser();

p.write("foo");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(2);

p.write("<select>");

expect(p.startIndex).toBe(3);
expect(p.endIndex).toBe(10);

p.write("<select>");

expect(p.startIndex).toBe(11);
expect(p.endIndex).toBe(18);

p.parseChunk("</select>");

expect(p.startIndex).toBe(19);
expect(p.endIndex).toBe(27);
});

test("should not have the start index be greater than the end index", () => {
const onopentag = jest.fn();
const onclosetag = jest.fn();
Expand Down Expand Up @@ -134,22 +110,33 @@ describe("API", () => {
});

test("should update the position when a single tag is spread across multiple chunks", () => {
const p = new Parser();
let called = false;
const p = new Parser({
onopentag() {
called = true;
expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(12);
},
});

p.write("<div ");
p.write("foo=bar>");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(12);
expect(called).toBe(true);
});

test("should have the correct position for implied opening tags", () => {
const p = new Parser();
let called = false;
const p = new Parser({
onopentag() {
called = true;
expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(3);
},
});

p.write("</p>");

expect(p.startIndex).toBe(0);
expect(p.endIndex).toBe(3);
expect(called).toBe(true);
});

test("should parse <__proto__> (#387)", () => {
Expand Down
91 changes: 70 additions & 21 deletions src/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ export class Parser {
public startIndex = 0;
/** The end index of the last event. */
public endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
private openTagStart = 0;

private tagname = "";
private attribname = "";
Expand All @@ -212,7 +217,6 @@ export class Parser {
cbs?: Partial<Handler> | null,
private readonly options: ParserOptions = {}
) {
this.options = options;
this.cbs = cbs ?? {};
this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode;
this.lowerCaseAttributeNames =
Expand All @@ -224,24 +228,23 @@ export class Parser {
this.cbs.onparserinit?.(this);
}

private updatePosition(offset: number) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();
}

// Tokenizer event handlers

/** @internal */
ontext(data: string): void {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
const idx = this.tokenizer.getAbsoluteIndex();
this.endIndex = idx;
this.cbs.ontext?.(data);
this.startIndex = idx;
}

protected isVoidElement(name: string): boolean {
return !this.options.xmlMode && voidElements.has(name);
}

/** @internal */
onopentagname(name: string): void {
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.lowerCaseTagNames) {
name = name.toLowerCase();
Expand All @@ -251,6 +254,7 @@ export class Parser {
}

private emitOpenTag(name: string) {
this.openTagStart = this.startIndex;
this.tagname = name;

const impliesClose =
Expand All @@ -277,7 +281,9 @@ export class Parser {
if (this.cbs.onopentag) this.attribs = {};
}

/** @internal */
onopentagend(): void {
this.startIndex = this.openTagStart;
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.attribs) {
Expand All @@ -287,11 +293,16 @@ export class Parser {
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname);
}

this.tagname = "";
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onclosetag(name: string): void {
this.updatePosition(2);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
Expand Down Expand Up @@ -319,8 +330,12 @@ export class Parser {
this.emitOpenTag(name);
this.closeCurrentTag();
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onselfclosingtag(): void {
if (
this.options.xmlMode ||
Expand All @@ -329,35 +344,44 @@ export class Parser {
) {
this.closeCurrentTag();
} else {
// Ignore the fact that the tag is self-closing.
this.onopentagend();
}
}

private closeCurrentTag() {
const name = this.tagname;
this.onopentagend();
/*
* Self-closing tags will be on the top of the stack
* (cheaper check than in onclosetag)
*/

// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
// Reset the start index
this.startIndex = this.openTagStart;

this.cbs.onclosetag?.(name);
this.stack.pop();
}
}

/** @internal */
onattribname(name: string): void {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();

if (this.lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this.attribname = name;
}

/** @internal */
onattribdata(value: string): void {
this.attribvalue += value;
}

/** @internal */
onattribend(quote: string | undefined | null): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

this.cbs.onattribute?.(this.attribname, this.attribvalue, quote);
if (
this.attribs &&
Expand All @@ -380,47 +404,70 @@ export class Parser {
return name;
}

/** @internal */
ondeclaration(value: string): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onprocessinginstruction(value: string): void {
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
oncomment(value: string): void {
this.updatePosition(4);
this.endIndex = this.tokenizer.getAbsoluteIndex();

this.cbs.oncomment?.(value);
this.cbs.oncommentend?.();

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
oncdata(value: string): void {
this.updatePosition(9);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.options.xmlMode || this.options.recognizeCDATA) {
this.cbs.oncdatastart?.();
this.cbs.ontext?.(value);
this.cbs.oncdataend?.();
} else {
this.oncomment(`[CDATA[${value}]]`);
this.cbs.oncomment?.(`[CDATA[${value}]]`);
this.cbs.oncommentend?.();
}

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
}

/** @internal */
onerror(err: Error): void {
this.cbs.onerror?.(err);
}

/** @internal */
onend(): void {
if (this.cbs.onclosetag) {
// Set start- and end indices for remaining tags
this.startIndex = this.endIndex = this.tokenizer.getAbsoluteIndex();
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (
let i = this.stack.length;
i > 0;
Expand All @@ -440,6 +487,8 @@ export class Parser {
this.attribname = "";
this.attribs = null;
this.stack = [];
this.startIndex = 0;
this.endIndex = 0;
this.cbs.onparserinit?.(this);
}

Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/01-simple.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
},
{
"event": "attribute",
"startIndex": 4,
"endIndex": 14,
"data": ["class", "test", null]
},
{
Expand All @@ -25,6 +27,8 @@
},
{
"event": "text",
"startIndex": 15,
"endIndex": 19,
"data": ["adsf"]
},
{
Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/02-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
},
{
"event": "attribute",
"startIndex": 11,
"endIndex": 30,
"data": ["type", "text/template", "\""]
},
{
Expand All @@ -37,6 +39,8 @@
},
{
"event": "text",
"startIndex": 32,
"endIndex": 49,
"data": ["<h1>Heading1</h1>"]
},
{
Expand Down
4 changes: 4 additions & 0 deletions src/__fixtures__/Events/03-lowercase_tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
},
{
"event": "attribute",
"startIndex": 4,
"endIndex": 14,
"data": ["class", "test", null]
},
{
Expand All @@ -30,6 +32,8 @@
},
{
"event": "text",
"startIndex": 15,
"endIndex": 19,
"data": ["adsf"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/04-cdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
},
{
"event": "text",
"startIndex": 5,
"endIndex": 41,
"data": [" asdf ><asdf></adsf><> fo"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/05-cdata-special.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
},
{
"event": "text",
"startIndex": 8,
"endIndex": 53,
"data": ["/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"]
},
{
Expand Down
2 changes: 2 additions & 0 deletions src/__fixtures__/Events/06-leading-lt.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"expected": [
{
"event": "text",
"startIndex": 0,
"endIndex": 3,
"data": [">a>"]
}
]
Expand Down
Loading

0 comments on commit 28c162b

Please sign in to comment.