Skip to content

Commit

Permalink
perf: simplify captureWhile
Browse files Browse the repository at this point in the history
Notably the atStart parameter is no longer computed and passed to the
test. Those places that need it need to compute it themselves.

In fact most places that used it don't need it. The ``isNameStartChar`` checks
were mostly redundant as the necessary check on the first character of names was
done elsewhere.
  • Loading branch information
lddubeau committed Aug 15, 2018
1 parent 07345bf commit bb2085c
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 125 deletions.
73 changes: 29 additions & 44 deletions lib/saxes.js
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,6 @@ class SaxesParser {
*
* @param {string} c The character to test.
*
* @param {boolean} atStart Whether the character will be at the start of its
* buffer. If the buffer was empty before capturing this character, this is
* ``true``.
*
* @returns {boolean} ``true`` if the method should continue capturing text,
* ``false`` otherwise.
*/
Expand All @@ -524,28 +520,19 @@ class SaxesParser {
*/
captureWhile(chunkState, test, buffer) {
const { limit, chunk, i: start } = chunkState;
let skip;
let c;
let atStart = this[buffer].length === 0;
// eslint-disable-next-line no-constant-condition
while (true) {
if (chunkState.i >= limit) {
c = undefined;
skip = 0;
break;
}

c = this.getCode(chunkState);
if (!test(c, atStart)) {
skip = c <= 0xFFFF ? 1 : 2;
break;
while (chunkState.i < limit) {
const c = this.getCode(chunkState);
if (!test(c)) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
chunkState.i - (c <= 0xFFFF ? 1 : 2));
return c;
}
atStart = false;
}

// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start, chunkState.i - skip);
return c;
this[buffer] += chunk.substring(start);
return undefined;
}

/**
Expand Down Expand Up @@ -718,7 +705,7 @@ class SaxesParser {
this.piTarget = this.piBody = "";
break;
default: {
this.fail("unencoded <.");
this.fail("disallowed characer in tag name.");
// if there was some whitespace, then add that in.
const pad = (this.startTagPosition + 1 < this.position) ?
new Array(this.position - this.startTagPosition).join(" ") :
Expand Down Expand Up @@ -928,11 +915,14 @@ class SaxesParser {

/** @private */
sPI(chunkState) {
// We have to perform the isNameStartChar check here because we do not feed
// the first character in piTarget elsehwere.
let check = this.piTarget.length === 0 ? isNameStartChar : isNameChar;
const c = this.captureWhile(
chunkState,
(cx, first) => {
(cx) => {
if (cx !== QUESTION && !isS(cx)) {
if (!((first ? isNameStartChar : isNameChar)(cx) &&
if (!(check(cx) &&
// When namespaces are used, colons are not allowed in entity
// names.
// https://www.w3.org/XML/xml-names-19990114-errata.html
Expand All @@ -941,6 +931,7 @@ class SaxesParser {
this.fail("disallowed characer in processing instruction name.");
}

check = isNameStartChar;
return true;
}

Expand Down Expand Up @@ -1059,7 +1050,8 @@ class SaxesParser {
}
break;
case S_XML_DECL_VALUE:
c = this.captureWhile(chunkState, cx => cx !== QUESTION && cx !== this.q,
c = this.captureWhile(chunkState,
cx => cx !== QUESTION && cx !== this.q,
"xmlDeclValue");

// The question mark character is not valid inside any of the XML
Expand Down Expand Up @@ -1182,11 +1174,13 @@ class SaxesParser {

/** @private */
sOpenTag(chunkState) {
// We don't need to check with isNameStartChar here because the first
// character of tagName is fed elsewhere, and the check is done there.
const c = this.captureWhile(
chunkState,
(cx, first) => {
(cx) => {
if (cx !== GREATER && !isS(cx) && cx !== FORWARD_SLASH) {
if (!((first ? isNameStartChar : isNameChar)(cx))) {
if (!isNameChar(cx)) {
this.fail("disallowed characer in tag name.");
}

Expand Down Expand Up @@ -1264,11 +1258,13 @@ class SaxesParser {

/** @private */
sAttribName(chunkState) {
// We don't need to check with isNameStartChar here because the first
// character of attribute is fed elsewhere, and the check is done there.
const c = this.captureWhile(
chunkState,
(cx, first) => {
(cx) => {
if (cx !== EQUAL && !isS(cx) && cx !== GREATER) {
if (!((first ? isNameStartChar : isNameChar)(cx))) {
if (!isNameChar(cx)) {
this.fail("disallowed characer in attribute name.");
}

Expand Down Expand Up @@ -1418,20 +1414,9 @@ class SaxesParser {

/** @private */
sCloseTag(chunkState) {
const c = this.captureWhile(
chunkState,
(cx, first) => {
if (cx !== GREATER && !isS(cx)) {
if (!((first ? isNameStartChar : isNameChar)(cx))) {
this.fail("disallowed characer in tag name.");
}

return true;
}

return false;
},
"tagName");
const c = this.captureWhile(chunkState,
cx => cx !== GREATER && !isS(cx),
"tagName");
if (c === GREATER) {
this.closeTag();
}
Expand Down
166 changes: 85 additions & 81 deletions test/opentagstart.js
Original file line number Diff line number Diff line change
@@ -1,93 +1,97 @@
"use strict";

require(".").test({
name: "openstarttag (xmlns true)",
xml: "<root length='12345'></root>",
expect: [
[
"opentagstart",
{
name: "root",
ns: {},
attributes: {},
},
],
[
"opentag",
{
name: "root",
prefix: "",
local: "root",
uri: "",
attributes: {
length: {
name: "length",
value: "12345",
prefix: "",
local: "length",
uri: "",
const { test } = require(".");

describe("openstarttag", () => {
test({
name: "good name, xmlns true",
xml: "<root length='12345'></root>",
expect: [
[
"opentagstart",
{
name: "root",
ns: {},
attributes: {},
},
],
[
"opentag",
{
name: "root",
prefix: "",
local: "root",
uri: "",
attributes: {
length: {
name: "length",
value: "12345",
prefix: "",
local: "length",
uri: "",
},
},
ns: {},
isSelfClosing: false,
},
ns: {},
isSelfClosing: false,
},
],
[
"closetag",
{
name: "root",
prefix: "",
local: "root",
uri: "",
attributes: {
length: {
name: "length",
value: "12345",
prefix: "",
local: "length",
uri: "",
],
[
"closetag",
{
name: "root",
prefix: "",
local: "root",
uri: "",
attributes: {
length: {
name: "length",
value: "12345",
prefix: "",
local: "length",
uri: "",
},
},
ns: {},
isSelfClosing: false,
},
ns: {},
isSelfClosing: false,
},
],
],
],
opt: {
xmlns: true,
},
});
opt: {
xmlns: true,
},
});

require(".").test({
name: "openstarttag (xmlns false)",
xml: "<root length='12345'></root>",
expect: [
[
"opentagstart",
{
name: "root",
attributes: {},
},
],
[
"opentag",
{
name: "root",
attributes: {
length: "12345",
test({
name: "good name, xmlns false",
xml: "<root length='12345'></root>",
expect: [
[
"opentagstart",
{
name: "root",
attributes: {},
},
isSelfClosing: false,
},
],
[
"closetag",
{
name: "root",
attributes: {
length: "12345",
],
[
"opentag",
{
name: "root",
attributes: {
length: "12345",
},
isSelfClosing: false,
},
],
[
"closetag",
{
name: "root",
attributes: {
length: "12345",
},
isSelfClosing: false,
},
isSelfClosing: false,
},
],
],
],
});
});

0 comments on commit bb2085c

Please sign in to comment.