From 57b1a57c5e221d5d66a34df9e6152c45de8da561 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Wed, 17 Apr 2019 18:14:53 +0100 Subject: [PATCH 1/4] Speed up passing ASCII-only strings to WASM Some speed up numbers from my string-heavy WASM benchmarks: - Firefox + encodeInto: +45% - Chrome + encodeInto: +80% - Firefox + encode: +29% - Chrome + encode: +62% Note that this helps specifically with case of lots of small ASCII strings, in case of large strings there is no measurable difference in either direction. --- crates/cli-support/src/js/mod.rs | 61 ++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index f5dade92a14..12d569c3246 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1445,18 +1445,51 @@ impl<'a> Context<'a> { self.expose_text_encoder(); self.expose_uint8_memory(); + // A fast path that directly writes char codes into WASM memory as long + // as it finds only ASCII characters. + // + // This is much faster for common ASCII strings because it can avoid + // calling out into C++ TextEncoder code. + // + // This might be not very intuitive, but such calls are usually more + // expensive in mainstream engines than staying in the JS, and + // charCodeAt on ASCII strings is usually optimised to raw bytes. + let start_encoding_as_ascii = format!( + " + {} + let size = arg.length; + let ptr = wasm.__wbindgen_malloc(size); + let offset = 0; + {{ + const mem = getUint8Memory(); + for (; offset < arg.length; offset++) {{ + const code = arg.charCodeAt(offset); + if (code > 0x7F) {{ + arg = arg.slice(offset); + break; + }} + mem[ptr + offset] = code; + }} + }} + ", + debug + ); + // The first implementation we have for this is to use // `TextEncoder#encode` which has been around for quite some time. let use_encode = format!( " {} - const buf = cachedTextEncoder.encode(arg); - const ptr = wasm.__wbindgen_malloc(buf.length); - getUint8Memory().set(buf, ptr); - WASM_VECTOR_LEN = buf.length; + if (offset !== arg.length) {{ + const buf = cachedTextEncoder.encode(arg); + ptr = wasm.__wbindgen_realloc(ptr, size, size += buf.length); + getUint8Memory().set(buf, ptr + offset); + offset += buf.length; + }} + WASM_VECTOR_LEN = offset; return ptr; ", - debug + start_encoding_as_ascii ); // Another possibility is to use `TextEncoder#encodeInto` which is much @@ -1465,23 +1498,15 @@ impl<'a> Context<'a> { let use_encode_into = format!( " {} - let size = arg.length; - let ptr = wasm.__wbindgen_malloc(size); - let writeOffset = 0; - while (true) {{ - const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size); - const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view); - writeOffset += written; - if (read === arg.length) {{ - break; - }} - arg = arg.substring(read); + if (offset !== arg.length) {{ ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3); + const view = getUint8Memory().subarray(ptr + offset, ptr + size); + offset += cachedTextEncoder.encodeInto(arg, view).written; }} - WASM_VECTOR_LEN = writeOffset; + WASM_VECTOR_LEN = offset; return ptr; ", - debug + start_encoding_as_ascii ); // Looks like `encodeInto` doesn't currently work when the memory passed From 7418cec613385ddf2f2adb9517438223946b46cf Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Fri, 10 May 2019 14:44:49 +0100 Subject: [PATCH 2/4] Reduce reallocation sizes --- crates/cli-support/src/js/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 12d569c3246..7445cc5c53c 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1482,7 +1482,7 @@ impl<'a> Context<'a> { {} if (offset !== arg.length) {{ const buf = cachedTextEncoder.encode(arg); - ptr = wasm.__wbindgen_realloc(ptr, size, size += buf.length); + ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length); getUint8Memory().set(buf, ptr + offset); offset += buf.length; }} @@ -1499,7 +1499,7 @@ impl<'a> Context<'a> { " {} if (offset !== arg.length) {{ - ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3); + ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3); const view = getUint8Memory().subarray(ptr + offset, ptr + size); offset += cachedTextEncoder.encodeInto(arg, view).written; }} From 0c681ee2baac066cbef8e8891052b73c5eec4d77 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Fri, 10 May 2019 15:31:59 +0100 Subject: [PATCH 3/4] Fix offset to arg comparison --- crates/cli-support/src/js/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 7445cc5c53c..539c864cc9d 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1464,10 +1464,7 @@ impl<'a> Context<'a> { const mem = getUint8Memory(); for (; offset < arg.length; offset++) {{ const code = arg.charCodeAt(offset); - if (code > 0x7F) {{ - arg = arg.slice(offset); - break; - }} + if (code > 0x7F) break; mem[ptr + offset] = code; }} }} @@ -1481,7 +1478,7 @@ impl<'a> Context<'a> { " {} if (offset !== arg.length) {{ - const buf = cachedTextEncoder.encode(arg); + const buf = cachedTextEncoder.encode(arg.slice(offset)); ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length); getUint8Memory().set(buf, ptr + offset); offset += buf.length; @@ -1499,6 +1496,7 @@ impl<'a> Context<'a> { " {} if (offset !== arg.length) {{ + arg = arg.slice(offset); ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3); const view = getUint8Memory().subarray(ptr + offset, ptr + size); offset += cachedTextEncoder.encodeInto(arg, view).written; From 15defcfd3ac092727841b11e1867e6a489797f16 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 13 May 2019 08:12:32 -0700 Subject: [PATCH 4/4] Add a debug assert and more tests --- crates/cli-support/src/js/mod.rs | 9 ++++++++- tests/headless/main.rs | 1 + tests/headless/strings.js | 15 +++++++++++++++ tests/headless/strings.rs | 12 ++++++++++++ tests/wasm/simple.js | 13 +++++++++++++ tests/wasm/simple.rs | 12 ++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tests/headless/strings.js create mode 100644 tests/headless/strings.rs diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 539c864cc9d..109468690d1 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1499,12 +1499,19 @@ impl<'a> Context<'a> { arg = arg.slice(offset); ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3); const view = getUint8Memory().subarray(ptr + offset, ptr + size); + const ret = cachedTextEncoder.encodeInto(arg, view); + {} offset += cachedTextEncoder.encodeInto(arg, view).written; }} WASM_VECTOR_LEN = offset; return ptr; ", - start_encoding_as_ascii + start_encoding_as_ascii, + if self.config.debug { + "if (ret.read != arg.length) throw new Error('failed to pass whole string');" + } else { + "" + }, ); // Looks like `encodeInto` doesn't currently work when the memory passed diff --git a/tests/headless/main.rs b/tests/headless/main.rs index fcc3ac6cc66..88bffc75e88 100755 --- a/tests/headless/main.rs +++ b/tests/headless/main.rs @@ -50,3 +50,4 @@ pub fn import_export_same_name() { pub mod snippets; pub mod modules; pub mod anyref_heap_live_count; +pub mod strings; diff --git a/tests/headless/strings.js b/tests/headless/strings.js new file mode 100644 index 00000000000..f22b994b57b --- /dev/null +++ b/tests/headless/strings.js @@ -0,0 +1,15 @@ +export function test_string_roundtrip(f) { + const test = expected => { + const actual = f(expected); + if (actual === expected) + return; + throw new Error(`string roundtrip "${actual}" != "${expected}"`); + }; + + test(''); + test('a'); + test('💖'); + + test('a longer string'); + test('a longer 💖 string'); +} diff --git a/tests/headless/strings.rs b/tests/headless/strings.rs new file mode 100644 index 00000000000..1c752d75484 --- /dev/null +++ b/tests/headless/strings.rs @@ -0,0 +1,12 @@ +use wasm_bindgen::prelude::*; +use wasm_bindgen_test::*; + +#[wasm_bindgen(module = "/tests/headless/strings.js")] +extern "C" { + fn test_string_roundtrip(c: &Closure String>); +} + +#[wasm_bindgen_test] +fn string_roundtrip() { + test_string_roundtrip(&Closure::wrap(Box::new(|s| s))); +} diff --git a/tests/wasm/simple.js b/tests/wasm/simple.js index c9b1ba6410a..5ba8b0e5d8b 100644 --- a/tests/wasm/simple.js +++ b/tests/wasm/simple.js @@ -92,3 +92,16 @@ exports.RenamedInRust = class {}; exports.new_renamed = () => new exports.RenamedInRust; exports.import_export_same_name = () => {}; + +exports.test_string_roundtrip = () => { + const test = s => { + assert.strictEqual(wasm.do_string_roundtrip(s), s); + }; + + test(''); + test('a'); + test('💖'); + + test('a longer string'); + test('a longer 💖 string'); +}; diff --git a/tests/wasm/simple.rs b/tests/wasm/simple.rs index b9f89df08ec..814cbcc022d 100644 --- a/tests/wasm/simple.rs +++ b/tests/wasm/simple.rs @@ -27,6 +27,8 @@ extern "C" { #[wasm_bindgen(js_name = RenamedInRust)] type Renamed; fn new_renamed() -> Renamed; + + fn test_string_roundtrip(); } #[wasm_bindgen_test] @@ -201,3 +203,13 @@ fn renaming_imports_and_instanceof() { pub fn import_export_same_name() { js_import_export_same_name(); } + +#[wasm_bindgen_test] +fn string_roundtrip() { + test_string_roundtrip(); +} + +#[wasm_bindgen] +pub fn do_string_roundtrip(s: String) -> String { + s +}