Support TextDecoder in pthreads mode (#14399)

Converting large strings from linear memory to JS is a lot faster with TextDecoder, but that does not work on SharedArrayBuffers: whatwg/encoding#172 So we avoid using TextDecoder then, and fall back to the path that creates a string one character at a time. That path can be quite pathological, however, incurring quadratic times in the worst case. Instead, with this PR we still use TextDecoder, by copying the data to a normal ArrayBuffer first. The extra copy adds some cost, but it is at least linear and predictable, and benchmarks show it is much faster on large strings.
emscripten-core · Jun 9, 2021 · 9af077b · 9af077b
1 parent d772e68
commit 9af077b
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 9 deletions.
diff --git a/emcc.py b/emcc.py
@@ -1840,8 +1840,6 @@ def default_setting(name, new_default):
       exit_with_error('USE_PTHREADS=2 is no longer supported')
     if settings.ALLOW_MEMORY_GROWTH:
       diagnostics.warning('pthreads-mem-growth', 'USE_PTHREADS + ALLOW_MEMORY_GROWTH may run non-wasm code slowly, see https://github.com/WebAssembly/design/issues/1271')
-    # UTF8Decoder.decode may not work with a view of a SharedArrayBuffer, see https://github.com/whatwg/encoding/issues/172
-    settings.TEXTDECODER = 0
     settings.SYSTEM_JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_pthread.js')))
     settings.EXPORTED_FUNCTIONS += [
       '___emscripten_pthread_data_constructor',

diff --git a/src/library_strings.js b/src/library_strings.js
@@ -6,6 +6,14 @@
 
 #if MINIMAL_RUNTIME
 
+// runtime_strings_extra.js defines a wrapper around TextDecoder, which is added
+// in the generated code. The minimal runtime logic here actually runs the
+// library code at compile time (as a way to create a library*.js file around
+// non-library JS), and so we must define it here as well.
+var TextDecoderWrapper = TextDecoder;
+
+// TODO: stop including this in such a manner, and instead make it a normal
+// library file in all modes.
 #include "runtime_strings_extra.js"
 #include "arrayUtils.js"
 

diff --git a/src/runtime_strings.js b/src/runtime_strings.js
@@ -9,11 +9,34 @@
 // Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the given array that contains uint8 values, returns
 // a copy of that string as a Javascript String object.
 
+#if USE_PTHREADS && TEXTDECODER
+// UTF8Decoder.decode may not work with a view of a SharedArrayBuffer, see
+// https://github.com/whatwg/encoding/issues/172
+// To avoid that, we wrap around it and add a copy into a normal ArrayBuffer,
+// which can still be much faster than creating a string character by
+// character.
+function TextDecoderWrapper(encoding) {
+  var textDecoder = new TextDecoder(encoding);
+  this.decode = function(data) {
+#if ASSERTIONS
+    assert(data instanceof Uint8Array);
+#endif
+    // While we compile with pthreads, this method can be called on side buffers
+    // as well, such as the stdout buffer in the filesystem code. Only copy when
+    // we have to.
+    if (data.buffer instanceof SharedArrayBuffer) {
+      data = new Uint8Array(data);
+    }
+    return textDecoder.decode.call(textDecoder, data);
+  };
+}
+#endif
+
 #if TEXTDECODER == 2
-var UTF8Decoder = new TextDecoder('utf8');
+var UTF8Decoder = new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf8');
 #else // TEXTDECODER == 2
 #if TEXTDECODER
-var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
+var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf8') : undefined;
 #endif // TEXTDECODER
 #endif // TEXTDECODER == 2
 

diff --git a/src/runtime_strings_extra.js b/src/runtime_strings_extra.js
@@ -32,10 +32,10 @@ function stringToAscii(str, outPtr) {
 // a copy of that string as a Javascript String object.
 
 #if TEXTDECODER == 2
-var UTF16Decoder = new TextDecoder('utf-16le');
+var UTF16Decoder = new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf-16le');
 #else // TEXTDECODER == 2
 #if TEXTDECODER
-var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-16le') : undefined;
+var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf-16le') : undefined;
 #endif // TEXTDECODER
 #endif // TEXTDECODER == 2
 

diff --git a/tests/benchmark_utf8.cpp b/tests/benchmark_utf8.cpp
@@ -57,8 +57,10 @@ int main() {
   double t = 0;
   double t2 = emscripten_get_now();
   for(int i = 0; i < 100000; ++i) {
-    // FF Nightly: Already on small strings of 64 bytes in length, TextDecoder trumps in performance.
-    char *str = randomString(8);
+    // Create strings of lengths 1-32, because the internals of text decoding
+    // have a cutoff of 16 for when to use TextDecoder, and we wish to test both
+    // (see UTF8ArrayToString).
+    char *str = randomString((i % 32) + 1);
     t += test(str);
     delete [] str;
   }

diff --git a/tests/test_browser.py b/tests/test_browser.py
@@ -131,6 +131,16 @@ def decorated(self, *args, **kwargs):
   return decorated
 
 
+def also_with_threads(f):
+  def decorated(self):
+    f(self)
+    if not os.environ.get('EMTEST_LACKS_THREAD_SUPPORT'):
+      print('(threads)')
+      self.emcc_args += ['-pthread']
+      f(self)
+  return decorated
+
+
 # Today we only support the wasm backend so any tests that is disabled under the llvm
 # backend is always disabled.
 # TODO(sbc): Investigate all tests with this decorator and either fix of remove the test.
@@ -4231,20 +4241,29 @@ def test_wasm_locate_file(self):
     shutil.move('test.wasm', Path('cdn/test.wasm'))
     self.run_browser('test.html', '', '/report_result?0')
 
+  @also_with_threads
   def test_utf8_textdecoder(self):
     self.btest_exit('benchmark_utf8.cpp', 0, args=['--embed-file', test_file('utf8_corpus.txt') + '@/utf8_corpus.txt', '-s', 'EXPORTED_RUNTIME_METHODS=[UTF8ToString]'])
 
+  @also_with_threads
   def test_utf16_textdecoder(self):
     self.btest_exit('benchmark_utf16.cpp', 0, args=['--embed-file', test_file('utf16_corpus.txt') + '@/utf16_corpus.txt', '-s', 'EXPORTED_RUNTIME_METHODS=[UTF16ToString,stringToUTF16,lengthBytesUTF16]'])
 
+  @also_with_threads
   def test_TextDecoder(self):
     self.btest('browser_test_hello_world.c', '0', args=['-s', 'TEXTDECODER=0'])
     just_fallback = os.path.getsize('test.js')
     self.btest('browser_test_hello_world.c', '0')
     td_with_fallback = os.path.getsize('test.js')
     self.btest('browser_test_hello_world.c', '0', args=['-s', 'TEXTDECODER=2'])
     td_without_fallback = os.path.getsize('test.js')
-    self.assertLess(td_without_fallback, just_fallback)
+    # pthread TextDecoder support is more complex due to
+    # https://github.com/whatwg/encoding/issues/172
+    # and therefore the expected code size win there is actually a loss
+    if '-pthread' not in self.emcc_args:
+      self.assertLess(td_without_fallback, just_fallback)
+    else:
+      self.assertGreater(td_without_fallback, just_fallback)
     self.assertLess(just_fallback, td_with_fallback)
 
   def test_small_js_flags(self):