Skip to content

Commit

Permalink
Speed up passing ASCII-only strings to WASM
Browse files Browse the repository at this point in the history
Some speed up numbers from my string-heavy WASM benchmarks:
 - Firefox + encodeInto: +45%
 - Chrome + encodeInto: +80%
 - Firefox + encode: +29%
 - Chrome + encode: +62%

Note that this helps specifically with case of lots of small ASCII strings, in case of large strings there is no measurable difference in either direction.
  • Loading branch information
RReverser committed Apr 30, 2019
1 parent 578d59e commit 46ccb35
Showing 1 changed file with 43 additions and 18 deletions.
61 changes: 43 additions & 18 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1344,18 +1344,51 @@ impl<'a> Context<'a> {
self.expose_text_encoder();
self.expose_uint8_memory();

// A fast path that directly writes char codes into WASM memory as long
// as it finds only ASCII characters.
//
// This is much faster for common ASCII strings because it can avoid
// calling out into C++ TextEncoder code.
//
// This might be not very intuitive, but such calls are usually more
// expensive in mainstream engines than staying in the JS, and
// charCodeAt on ASCII strings is usually optimised to raw bytes.
let start_encoding_as_ascii = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let offset = 0;
{{
const mem = getUint8Memory();
for (; offset < arg.length; offset++) {{
const code = arg.charCodeAt(offset);
if (code > 0x7F) {{
arg = arg.slice(offset);
break;
}}
mem[ptr + offset] = code;
}}
}}
",
debug
);

// The first implementation we have for this is to use
// `TextEncoder#encode` which has been around for quite some time.
let use_encode = format!(
"
{}
const buf = cachedTextEncoder.encode(arg);
const ptr = wasm.__wbindgen_malloc(buf.length);
getUint8Memory().set(buf, ptr);
WASM_VECTOR_LEN = buf.length;
if (offset !== arg.length) {{
const buf = cachedTextEncoder.encode(arg);
ptr = wasm.__wbindgen_realloc(ptr, size, size += buf.length);
getUint8Memory().set(buf, ptr + offset);
offset += buf.length;
}}
WASM_VECTOR_LEN = offset;
return ptr;
",
debug
start_encoding_as_ascii
);

// Another possibility is to use `TextEncoder#encodeInto` which is much
Expand All @@ -1364,23 +1397,15 @@ impl<'a> Context<'a> {
let use_encode_into = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let writeOffset = 0;
while (true) {{
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
writeOffset += written;
if (read === arg.length) {{
break;
}}
arg = arg.substring(read);
if (offset !== arg.length) {{
ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3);
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
offset += cachedTextEncoder.encodeInto(arg, view).written;
}}
WASM_VECTOR_LEN = writeOffset;
WASM_VECTOR_LEN = offset;
return ptr;
",
debug
start_encoding_as_ascii
);

// Looks like `encodeInto` doesn't currently work when the memory passed
Expand Down

0 comments on commit 46ccb35

Please sign in to comment.