Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement some functions in AssemblyScript/WebAssembly #26

Merged
merged 28 commits into from
May 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f2779b6
initial commit after `asinit .`
gfx May 13, 2019
80a1bd4
implement utf8Encode in AssemblyScript (but slow)
gfx May 14, 2019
be528f3
asbuild -O3; add benchmark; USE_WASM=true
gfx May 15, 2019
d9bf5af
fix misuse of loaad<T>()
gfx May 15, 2019
a4903ec
use String.fromCharCode.apply() in WASM ver.
gfx May 15, 2019
6c55452
re-structured wasm modules
gfx May 16, 2019
6066940
move use of utf8DecodeWasm to utf8.ts
gfx May 16, 2019
8a663c4
use memory allocator in wasm functions
gfx May 16, 2019
01fd626
refactor dist structure
gfx May 16, 2019
7694547
assumes process.env is always available
gfx May 16, 2019
e36f068
implement utf8CountWasm()
gfx May 16, 2019
c107406
implement the whole string encoder in wasm
gfx May 18, 2019
a1dd4f4
tiny optimization for utf8 encode/count
gfx May 23, 2019
66f2a78
simulate wasm + esm interface in pack-wasm.ts
gfx May 23, 2019
0d6b2f4
benchmark tweaks
gfx May 23, 2019
1525610
tweaks for WASM performance
gfx May 23, 2019
ede703d
remove dead code
gfx May 23, 2019
f3bca07
cleanup comments
gfx May 23, 2019
aaf70e2
tweaks; @inline is redundant in -O3
gfx May 23, 2019
8be8c5b
do not include wasm in the default bundle js
gfx May 23, 2019
78ceac5
Merge remote-tracking branch 'origin/master' into assemblyscript
gfx May 23, 2019
9a3c7bf
fix large string decode issues
gfx May 23, 2019
12fecca
fix karma config because webpack.config.js now returns array of configs
gfx May 23, 2019
c3c62bd
add test for broken code unit (e.g. \xff)
gfx May 23, 2019
5e631cc
do not drop lone surrogate
gfx May 23, 2019
4e058db
coverage report for both test:purejs and test:wasm
gfx May 24, 2019
ebe391a
set --forceConsistentCasingInFileNames
gfx May 25, 2019
b8025a5
test: make mocha.timeout longer (5 sec. to 10 sec.)
gfx May 25, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ module.exports = {
"no-var": "warn",
"valid-typeof": "warn", // "bigint" is not yet supported
"no-return-await": "warn",
"prefer-const": "warn",
// "prefer-const": "warn", // TODO: AssemblyScript has different semantics.
"guard-for-in": "warn",
"curly": "warn",
"no-param-reassign": "warn",

"@typescript-eslint/no-unused-vars":"warn",
"@typescript-eslint/array-type": ["error", "generic"],
Expand All @@ -34,7 +35,6 @@ module.exports = {
"@typescript-eslint/prefer-includes": "warn",
"@typescript-eslint/prefer-string-starts-ends-with": "warn",
"@typescript-eslint/no-use-before-define": "warn",
"@typescript-eslint/restrict-plus-operands": "error",
"@typescript-eslint/await-thenable": "error",
"@typescript-eslint/no-for-in-array": "error",

Expand Down
2 changes: 1 addition & 1 deletion .nycrc.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"include": ["src/**/*.ts"],
"extension": [".ts"],
"reporter": ["text-summary", "html", "lcov"],
"reporter": [],
"sourceMap": true,
"instrument": true
}
81 changes: 81 additions & 0 deletions assembly/be.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// load/store values in big-endian

export function loadFload32BE(byteOffset: usize): f32 {
return reinterpret<f32>(bswap<u32>(load<u32>(byteOffset)));
}

export function loadFloat64BE(byteOffset: usize): f64 {
return reinterpret<f64>(bswap<u64>(load<u64>(byteOffset)));
}

export function loadInt8BE(byteOffset: usize): i8 {
return load<i8>(byteOffset);
}

export function loadInt16BE(byteOffset: usize): i16 {
return bswap<i16>(load<i16>(byteOffset));
}

export function loadInt32BE(byteOffset: usize): i32 {
return bswap<i32>(load<i32>(byteOffset));
}

export function loadInt64BE(byteOffset: usize): i64 {
return bswap<i64>(load<i64>(byteOffset));
}

export function loadUint8BE(byteOffset: usize): u8 {
return load<u8>(byteOffset);
}

export function loadUint16BE(byteOffset: usize): u16 {
return bswap<u16>(load<u16>(byteOffset));
}

export function loadUint32BE(byteOffset: usize): u32 {
return bswap<u32>(load<u32>(byteOffset));
}

export function loadUint64BE(byteOffset: usize): u64 {
return bswap<u64>(load<u64>(byteOffset));
}

export function storeFloat32BE(byteOffset: usize, value: f32): void {
store<u32>(byteOffset, bswap<u32>(reinterpret<u32>(value)));
}

export function storeFloat64BE(byteOffset: usize, value: f64): void {
store<u64>(byteOffset, bswap<u64>(reinterpret<u64>(value)));
}

export function storeInt8BE(byteOffset: usize, value: i8): void {
store<i8>(byteOffset, value);
}

export function storeInt16BE(byteOffset: usize, value: i16): void {
store<i16>(byteOffset, bswap<i16>(value));
}

export function storeInt32BE(byteOffset: usize, value: i32): void {
store<i32>(byteOffset, bswap<i32>(value));
}

export function storeInt64BE(byteOffset: usize, value: i64): void {
store<i64>(byteOffset, bswap<i64>(value));
}

export function storeUint8BE(byteOffset: usize, value: u8): void {
store<u8>(byteOffset, value);
}

export function storeUint16BE(byteOffset: usize, value: u16): void {
store<u16>(byteOffset, bswap<u16>(value));
}

export function storeUint32BE(byteOffset: usize, value: u32): void {
store<u32>(byteOffset, bswap<u32>(value));
}

export function storeUint64BE(byteOffset: usize, value: u64): void {
store<u64>(byteOffset, bswap<u64>(value));
}
3 changes: 3 additions & 0 deletions assembly/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array";
export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array";
export { malloc, free } from "./memory";
9 changes: 9 additions & 0 deletions assembly/memory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import "allocator/tlsf";

export function malloc(size: usize): usize {
return memory.allocate(size);
}

export function free(ptr: usize): void {
memory.free(ptr);
}
6 changes: 6 additions & 0 deletions assembly/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"extends": "../node_modules/assemblyscript/std/assembly.json",
"include": [
"./**/*.ts"
]
}
44 changes: 44 additions & 0 deletions assembly/utf8CountUint16Array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { loadUint16BE } from "./be";

// inputPtr: u16*
export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize {
const u16s = sizeof<u16>();

let byteLength: usize = 0;
let pos: usize = inputPtr;
let end = inputPtr + inputLength * u16s;
while (pos < end) {
let value: u32 = loadUint16BE(pos);
pos += u16s;

if ((value & 0xffffff80) === 0) {
// 1-byte
byteLength++;
continue;
} else if ((value & 0xfffff800) === 0) {
// 2-bytes
byteLength += 2;
} else {
// handle surrogate pair
if (value >= 0xd800 && value <= 0xdbff) {
// high surrogate
if (pos < end) {
let extra: u32 = loadUint16BE(pos);
if ((extra & 0xfc00) === 0xdc00) {
pos += u16s;
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
}
}
}

if ((value & 0xffff0000) === 0) {
// 3-byte
byteLength += 3;
} else {
// 4-byte
byteLength += 4;
}
}
}
return byteLength;
}
46 changes: 46 additions & 0 deletions assembly/utf8DecodeToUint16Array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
export function utf8DecodeToUint16Array(outputPtr: usize, inputPtr: usize, byteLength: usize): usize {
let inputOffset = inputPtr;
let outputOffset = outputPtr;
let inputOffsetEnd = inputOffset + byteLength;
const u16s = sizeof<u16>();

while (inputOffset < inputOffsetEnd) {
let byte1: u16 = load<u8>(inputOffset++);
if ((byte1 & 0x80) === 0) {
// 1 byte
store<u16>(outputOffset, byte1);
outputOffset += u16s;
} else if ((byte1 & 0xe0) === 0xc0) {
// 2 bytes
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
// FIXME: consider endians
store<u16>(outputOffset, ((byte1 & 0x1f) << 6) | byte2);
outputOffset += u16s;
} else if ((byte1 & 0xf0) === 0xe0) {
// 3 bytes
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
let byte3: u16 = load<u8>(inputOffset++) & 0x3f;
store<u16>(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
outputOffset += u16s;
} else if ((byte1 & 0xf8) === 0xf0) {
// 4 bytes
let byte2 = load<u8>(inputOffset++) & 0x3f;
let byte3 = load<u8>(inputOffset++) & 0x3f;
let byte4 = load<u8>(inputOffset++) & 0x3f;
let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
if (codepoint > 0xffff) {
codepoint -= 0x10000;
store<u16>(outputOffset, ((codepoint >>> 10) & 0x3ff) | 0xd800);
outputOffset += u16s;
codepoint = 0xdc00 | (codepoint & 0x3ff);
}
store<u16>(outputOffset, codepoint);
outputOffset += u16s;
} else {
// invalid UTF-8
store<u16>(outputOffset++, byte1);
outputOffset += u16s;
}
}
return (outputOffset - outputPtr) / u16s;
}
80 changes: 80 additions & 0 deletions assembly/utf8EncodeUint16Array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { utf8CountUint16Array } from "./utf8CountUint16Array";
import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./be";

function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize {
let ptr = outputPtr;
if (utf8ByteLength < 32) {
// fixstr
storeUint8BE(ptr++, 0xa0 + (utf8ByteLength as u8));
} else if (utf8ByteLength < 0x100) {
// str 8
storeUint8BE(ptr++, 0xd9);
storeUint8BE(ptr++, utf8ByteLength as u8);
} else if (utf8ByteLength < 0x10000) {
// str 16
storeUint8BE(ptr++, 0xda);
storeUint16BE(ptr, utf8ByteLength as u16);
ptr += sizeof<u16>();
} else if ((utf8ByteLength as u64) < 0x100000000) {
// str 32
storeUint8BE(ptr++, 0xdb);
storeUint32BE(ptr, utf8ByteLength as u32);
ptr += sizeof<u32>();
} else {
throw new Error(`Too long string: ${utf8ByteLength} bytes in UTF-8`);
}
return ptr;
}

// outputPtr: u8*
// inputPtr: u16*
// It adds MessagePack str head bytes to the output
export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLength: usize): usize {
let utf8ByteLength = utf8CountUint16Array(inputPtr, inputLength);
let strHeaderOffset = storeStringHeader(outputPtr, utf8ByteLength);

const u16s = sizeof<u16>();
let inputOffset = inputPtr;
let inputEnd = inputPtr + inputLength * u16s;
let outputOffset = strHeaderOffset;
while (inputOffset < inputEnd) {
let value: u32 = loadUint16BE(inputOffset);
inputOffset += u16s;

if ((value & 0xffffff80) === 0) {
// 1-byte
store<u8>(outputOffset++, value);
continue;
} else if ((value & 0xfffff800) === 0) {
// 2-bytes
store<u8>(outputOffset++, ((value >> 6) & 0x1f) | 0xc0);
} else {
// handle surrogate pair
if (value >= 0xd800 && value <= 0xdbff) {
// high surrogate
if (inputOffset < inputEnd) {
let extra: u32 = loadUint16BE(inputOffset);
if ((extra & 0xfc00) === 0xdc00) {
inputOffset += u16s;
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
}
}
}

if ((value & 0xffff0000) === 0) {
// 3-byte
store<u8>(outputOffset++, ((value >> 12) & 0x0f) | 0xe0);
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
} else {
// 4-byte
store<u8>(outputOffset++, ((value >> 18) & 0x07) | 0xf0);
store<u8>(outputOffset++, ((value >> 12) & 0x3f) | 0x80);
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
}
}

store<u8>(outputOffset++, (value & 0x3f) | 0x80);
}

return outputOffset - outputPtr;
}
53 changes: 53 additions & 0 deletions benchmark/string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/* eslint-disable no-console */
import { encode, decode } from "../src";
import { WASM_AVAILABLE } from "../src/wasmFunctions";

console.log(`WASM_AVAILABLE=${WASM_AVAILABLE}`);

const ascii = "A".repeat(40000);
const emoji = "🌏".repeat(20000);

{
// warm up ascii
const data = ascii;
const encoded = encode(data);
decode(encoded);
console.log(`encode / decode ascii data.length=${data.length} encoded.byteLength=${encoded.byteLength}`);

// run

console.time("encode ascii");
for (let i = 0; i < 1000; i++) {
encode(data);
}
console.timeEnd("encode ascii");

console.time("decode ascii");
for (let i = 0; i < 1000; i++) {
decode(encoded);
}
console.timeEnd("decode ascii");
}

{
// warm up emoji
const data = emoji;
const encoded = encode(data);
decode(encoded);

console.log(`encode / decode emoji data.length=${data.length} encoded.byteLength=${encoded.byteLength}`);

// run

console.time("encode emoji");
for (let i = 0; i < 1000; i++) {
encode(data);
}
console.timeEnd("encode emoji");

console.time("decode emoji");
for (let i = 0; i < 1000; i++) {
decode(encoded);
}
console.timeEnd("decode emoji");
}
Loading