-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speed up passing ASCII-only strings to WASM #1470
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1445,18 +1445,48 @@ impl<'a> Context<'a> { | |
self.expose_text_encoder(); | ||
self.expose_uint8_memory(); | ||
|
||
// A fast path that directly writes char codes into WASM memory as long | ||
// as it finds only ASCII characters. | ||
// | ||
// This is much faster for common ASCII strings because it can avoid | ||
// calling out into C++ TextEncoder code. | ||
// | ||
// This might be not very intuitive, but such calls are usually more | ||
// expensive in mainstream engines than staying in the JS, and | ||
// charCodeAt on ASCII strings is usually optimised to raw bytes. | ||
let start_encoding_as_ascii = format!( | ||
" | ||
{} | ||
let size = arg.length; | ||
let ptr = wasm.__wbindgen_malloc(size); | ||
let offset = 0; | ||
{{ | ||
const mem = getUint8Memory(); | ||
for (; offset < arg.length; offset++) {{ | ||
const code = arg.charCodeAt(offset); | ||
if (code > 0x7F) break; | ||
mem[ptr + offset] = code; | ||
}} | ||
}} | ||
", | ||
debug | ||
); | ||
|
||
// The first implementation we have for this is to use | ||
// `TextEncoder#encode` which has been around for quite some time. | ||
let use_encode = format!( | ||
" | ||
{} | ||
const buf = cachedTextEncoder.encode(arg); | ||
const ptr = wasm.__wbindgen_malloc(buf.length); | ||
getUint8Memory().set(buf, ptr); | ||
WASM_VECTOR_LEN = buf.length; | ||
if (offset !== arg.length) {{ | ||
const buf = cachedTextEncoder.encode(arg.slice(offset)); | ||
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length); | ||
getUint8Memory().set(buf, ptr + offset); | ||
offset += buf.length; | ||
}} | ||
WASM_VECTOR_LEN = offset; | ||
return ptr; | ||
", | ||
debug | ||
start_encoding_as_ascii | ||
); | ||
|
||
// Another possibility is to use `TextEncoder#encodeInto` which is much | ||
|
@@ -1465,23 +1495,23 @@ impl<'a> Context<'a> { | |
let use_encode_into = format!( | ||
" | ||
{} | ||
let size = arg.length; | ||
let ptr = wasm.__wbindgen_malloc(size); | ||
let writeOffset = 0; | ||
while (true) {{ | ||
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size); | ||
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view); | ||
writeOffset += written; | ||
if (read === arg.length) {{ | ||
break; | ||
}} | ||
arg = arg.substring(read); | ||
ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3); | ||
if (offset !== arg.length) {{ | ||
arg = arg.slice(offset); | ||
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3); | ||
const view = getUint8Memory().subarray(ptr + offset, ptr + size); | ||
const ret = cachedTextEncoder.encodeInto(arg, view); | ||
{} | ||
offset += cachedTextEncoder.encodeInto(arg, view).written; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could a debug assert of some form be included after this to ensure that it wrote everything? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't exactly use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah great. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I'm not sure what to check here. I mean, I'm passing all that's left of |
||
}} | ||
WASM_VECTOR_LEN = writeOffset; | ||
WASM_VECTOR_LEN = offset; | ||
return ptr; | ||
", | ||
debug | ||
start_encoding_as_ascii, | ||
if self.config.debug { | ||
"if (ret.read != arg.length) throw new Error('failed to pass whole string');" | ||
} else { | ||
"" | ||
}, | ||
); | ||
|
||
// Looks like `encodeInto` doesn't currently work when the memory passed | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
export function test_string_roundtrip(f) { | ||
const test = expected => { | ||
const actual = f(expected); | ||
if (actual === expected) | ||
return; | ||
throw new Error(`string roundtrip "${actual}" != "${expected}"`); | ||
}; | ||
|
||
test(''); | ||
test('a'); | ||
test('💖'); | ||
|
||
test('a longer string'); | ||
test('a longer 💖 string'); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
use wasm_bindgen::prelude::*; | ||
use wasm_bindgen_test::*; | ||
|
||
#[wasm_bindgen(module = "/tests/headless/strings.js")] | ||
extern "C" { | ||
fn test_string_roundtrip(c: &Closure<Fn(String) -> String>); | ||
} | ||
|
||
#[wasm_bindgen_test] | ||
fn string_roundtrip() { | ||
test_string_roundtrip(&Closure::wrap(Box::new(|s| s))); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@alexcrichton I see you already merged it, but this change... doesn't seem right. Why is it encoding same view twice now?