Skip to content

Commit

Permalink
Merge pull request #68 from msgpack/text_encoder
Browse files Browse the repository at this point in the history
use TextEncoder to encode string if available
  • Loading branch information
gfx authored Jul 8, 2019
2 parents 271f2ca + 514a33f commit e4cb3ce
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 14 deletions.
4 changes: 2 additions & 2 deletions benchmark/decode-string.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable no-console */
import { utf8Encode, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8";
import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8";
import { utf8DecodeWasm } from "../src/wasmFunctions";

// @ts-ignore
Expand All @@ -13,7 +13,7 @@ for (const baseStr of ["A", "あ", "🌏"]) {
for (const str of dataSet) {
const byteLength = utf8Count(str);
const bytes = new Uint8Array(new ArrayBuffer(byteLength));
utf8Encode(str, bytes, 0);
utf8EncodeJs(str, bytes, 0);

console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);

Expand Down
33 changes: 33 additions & 0 deletions benchmark/encode-string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/* eslint-disable no-console */
import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8";

// @ts-ignore
import Benchmark from "benchmark";

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
return baseStr.repeat(n);
});

for (const str of dataSet) {
const byteLength = utf8Count(str);
const buffer = new Uint8Array(byteLength);

console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);

const suite = new Benchmark.Suite();

suite.add("utf8EncodeJs", () => {
utf8EncodeJs(str, buffer, 0);
});

suite.add("utf8DecodeTE", () => {
utf8EncodeTE(str, buffer, 0);
});
suite.on("cycle", (event: any) => {
console.log(String(event.target));
});

suite.run();
}
}
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
"prepublishOnly": "run-p 'test:dist:*' && npm run test:browser",
"clean": "rimraf build dist dist.*",
"test": "mocha 'test/**/*.test.ts'",
"test:purejs": "TEXT_DECODER=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'",
"test:wasm": "npm run asbuild:production && TEXT_DECODER=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'",
"test:td": "TEXT_DECODER=force mocha 'test/**/*.test.ts'",
"test:purejs": "TEXT_ENCODING=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'",
"test:wasm": "npm run asbuild:production && TEXT_ENCODING=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'",
"test:te": "TEXT_ENCODING=force mocha 'test/**/*.test.ts'",
"test:dist:purejs": "TS_NODE_PROJECT=tsconfig.test-dist-es5-purejs.json npm run test:purejs -- --reporter=dot",
"test:dist:wasm": "TS_NODE_PROJECT=tsconfig.test-dist-es5-wasm.json npm run test:wasm -- --reporter=dot",
"test:cover": "npm run cover:clean && npm-run-all 'test:cover:*' && npm run cover:report",
"test:cover:purejs": "npx nyc --no-clean npm run test:purejs",
"test:cover:wasm": "npx nyc --no-clean npm run test:wasm",
"test:cover:td": "npx nyc --no-clean npm run test:td",
"test:cover:te": "npx nyc --no-clean npm run test:te",
"cover:clean": "rimraf .nyc_output coverage/",
"cover:report": "nyc report --reporter=text-summary --reporter=html --reporter=json",
"test:browser": "karma start --single-run",
Expand Down
4 changes: 2 additions & 2 deletions src/Decoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { prettyByte } from "./utils/prettyByte";
import { ExtensionCodec } from "./ExtensionCodec";
import { getInt64, getUint64 } from "./utils/int";
import { utf8DecodeJs, TEXT_DECODER_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8";
import { utf8DecodeJs, TEXT_ENCODING_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8";
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions";

Expand Down Expand Up @@ -482,7 +482,7 @@ export class Decoder {

const offset = this.pos + headerOffset;
let object: string;
if (TEXT_DECODER_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
if (TEXT_ENCODING_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
object = utf8DecodeTD(this.bytes, offset, byteLength);
} else if (WASM_AVAILABLE && byteLength > WASM_STR_THRESHOLD) {
object = utf8DecodeWasm(this.bytes, offset, byteLength);
Expand Down
12 changes: 9 additions & 3 deletions src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { utf8Encode, utf8Count } from "./utils/utf8";
import { utf8EncodeJs, utf8Count, TEXT_ENCODING_AVAILABLE, TEXT_ENCODER_THRESHOLD, utf8EncodeTE } from "./utils/utf8";
import { ExtensionCodec } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array } from "./utils/typedArrays";
Expand Down Expand Up @@ -148,7 +148,13 @@ export class Encoder {
const maxHeaderSize = 1 + 4;
const strLength = object.length;

if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) {
if (TEXT_ENCODING_AVAILABLE && strLength > TEXT_ENCODER_THRESHOLD) {
const byteLength = utf8Count(object);
this.ensureBufferSizeToWrite(maxHeaderSize + byteLength);
this.writeStringHeader(byteLength);
utf8EncodeTE(object, this.bytes, this.pos);
this.pos += byteLength;
} else if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) {
// ensure max possible size
const maxSize = maxHeaderSize + strLength * 4;
this.ensureBufferSizeToWrite(maxSize);
Expand All @@ -161,7 +167,7 @@ export class Encoder {
const byteLength = utf8Count(object);
this.ensureBufferSizeToWrite(maxHeaderSize + byteLength);
this.writeStringHeader(byteLength);
utf8Encode(object, this.bytes, this.pos);
utf8EncodeJs(object, this.bytes, this.pos);
this.pos += byteLength;
}
}
Expand Down
24 changes: 21 additions & 3 deletions src/utils/utf8.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
export const TEXT_ENCODING_AVAILABLE =
process.env.TEXT_ENCODING !== "never" && (typeof TextEncoder !== "undefined" && typeof TextDecoder !== "undefined");

export function utf8Count(str: string): number {
const strLength = str.length;

Expand Down Expand Up @@ -38,7 +41,7 @@ export function utf8Count(str: string): number {
return byteLength;
}

export function utf8Encode(str: string, output: Uint8Array, outputOffset: number): void {
export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: number): void {
const strLength = str.length;
let offset = outputOffset;
let pos = 0;
Expand Down Expand Up @@ -81,6 +84,22 @@ export function utf8Encode(str: string, output: Uint8Array, outputOffset: number
}
}

const sharedTextEncoder = TEXT_ENCODING_AVAILABLE ? new TextEncoder() : undefined;
export const TEXT_ENCODER_THRESHOLD = process.env.TEXT_ENCODING !== "force" ? 200 : 0;

function utf8EncodeTEencode(str: string, output: Uint8Array, outputOffset: number): void {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
output.set(sharedTextEncoder!.encode(str), outputOffset);
}

function utf8EncodeTEencodeInto(str: string, output: Uint8Array, outputOffset: number): void {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
sharedTextEncoder!.encodeInto(str, output.subarray(outputOffset));
}

export const utf8EncodeTE =
sharedTextEncoder && sharedTextEncoder.encodeInto ? utf8EncodeTEencodeInto : utf8EncodeTEencode;

const CHUNK_SIZE = 0x10_000;

export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
Expand Down Expand Up @@ -132,8 +151,7 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
return result;
}

const sharedTextDecoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : null;
export const TEXT_DECODER_AVAILABLE = process.env.TEXT_DECODER !== "never" && !!sharedTextDecoder;
const sharedTextDecoder = TEXT_ENCODING_AVAILABLE ? new TextDecoder() : null;
export const TEXT_DECODER_THRESHOLD = process.env.TEXT_DECODER !== "force" ? 200 : 0;

export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
Expand Down

0 comments on commit e4cb3ce

Please sign in to comment.