Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up passing ASCII-only strings to WASM #1470

Merged
merged 4 commits into from
May 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 49 additions & 19 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1445,18 +1445,48 @@ impl<'a> Context<'a> {
self.expose_text_encoder();
self.expose_uint8_memory();

// A fast path that directly writes char codes into WASM memory as long
// as it finds only ASCII characters.
//
// This is much faster for common ASCII strings because it can avoid
// calling out into C++ TextEncoder code.
//
// This might be not very intuitive, but such calls are usually more
// expensive in mainstream engines than staying in the JS, and
// charCodeAt on ASCII strings is usually optimised to raw bytes.
let start_encoding_as_ascii = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let offset = 0;
{{
const mem = getUint8Memory();
for (; offset < arg.length; offset++) {{
const code = arg.charCodeAt(offset);
if (code > 0x7F) break;
mem[ptr + offset] = code;
}}
}}
",
debug
);

// The first implementation we have for this is to use
// `TextEncoder#encode` which has been around for quite some time.
let use_encode = format!(
"
{}
const buf = cachedTextEncoder.encode(arg);
const ptr = wasm.__wbindgen_malloc(buf.length);
getUint8Memory().set(buf, ptr);
WASM_VECTOR_LEN = buf.length;
if (offset !== arg.length) {{
const buf = cachedTextEncoder.encode(arg.slice(offset));
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length);
getUint8Memory().set(buf, ptr + offset);
offset += buf.length;
}}
WASM_VECTOR_LEN = offset;
return ptr;
",
debug
start_encoding_as_ascii
);

// Another possibility is to use `TextEncoder#encodeInto` which is much
Expand All @@ -1465,23 +1495,23 @@ impl<'a> Context<'a> {
let use_encode_into = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let writeOffset = 0;
while (true) {{
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
writeOffset += written;
if (read === arg.length) {{
break;
}}
arg = arg.substring(read);
ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3);
if (offset !== arg.length) {{
arg = arg.slice(offset);
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3);
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
const ret = cachedTextEncoder.encodeInto(arg, view);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexcrichton I see you already merged it, but this change... doesn't seem right. Why is it encoding same view twice now?

{}
offset += cachedTextEncoder.encodeInto(arg, view).written;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could a debug assert of some form be included after this to ensure that it wrote everything?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't exactly use debug_assert in the JS code, but I guess I could add something if debug_assertions is on...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a --debug flag to wasm-bindgen itself which can control whether the assertion is emitted or not

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah great.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'm not sure what to check here. I mean, I'm passing all that's left of arg to encodeInto, so it has to be written fully - what condition would I check to prove it?

}}
WASM_VECTOR_LEN = writeOffset;
WASM_VECTOR_LEN = offset;
return ptr;
",
debug
start_encoding_as_ascii,
if self.config.debug {
"if (ret.read != arg.length) throw new Error('failed to pass whole string');"
} else {
""
},
);

// Looks like `encodeInto` doesn't currently work when the memory passed
Expand Down
1 change: 1 addition & 0 deletions tests/headless/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ pub fn import_export_same_name() {
pub mod snippets;
pub mod modules;
pub mod anyref_heap_live_count;
pub mod strings;
15 changes: 15 additions & 0 deletions tests/headless/strings.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export function test_string_roundtrip(f) {
const test = expected => {
const actual = f(expected);
if (actual === expected)
return;
throw new Error(`string roundtrip "${actual}" != "${expected}"`);
};

test('');
test('a');
test('💖');

test('a longer string');
test('a longer 💖 string');
}
12 changes: 12 additions & 0 deletions tests/headless/strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use wasm_bindgen::prelude::*;
use wasm_bindgen_test::*;

#[wasm_bindgen(module = "/tests/headless/strings.js")]
extern "C" {
fn test_string_roundtrip(c: &Closure<Fn(String) -> String>);
}

#[wasm_bindgen_test]
fn string_roundtrip() {
test_string_roundtrip(&Closure::wrap(Box::new(|s| s)));
}
13 changes: 13 additions & 0 deletions tests/wasm/simple.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,16 @@ exports.RenamedInRust = class {};
exports.new_renamed = () => new exports.RenamedInRust;

exports.import_export_same_name = () => {};

exports.test_string_roundtrip = () => {
const test = s => {
assert.strictEqual(wasm.do_string_roundtrip(s), s);
};

test('');
test('a');
test('💖');

test('a longer string');
test('a longer 💖 string');
};
12 changes: 12 additions & 0 deletions tests/wasm/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ extern "C" {
#[wasm_bindgen(js_name = RenamedInRust)]
type Renamed;
fn new_renamed() -> Renamed;

fn test_string_roundtrip();
}

#[wasm_bindgen_test]
Expand Down Expand Up @@ -201,3 +203,13 @@ fn renaming_imports_and_instanceof() {
pub fn import_export_same_name() {
js_import_export_same_name();
}

#[wasm_bindgen_test]
fn string_roundtrip() {
test_string_roundtrip();
}

#[wasm_bindgen]
pub fn do_string_roundtrip(s: String) -> String {
s
}