From 74a6bd1ae67d31d4efc4283072482e94487921d1 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 10 May 2021 13:44:35 +0000 Subject: [PATCH 1/5] Emit bundler-friendly URL locators This PR changes loading of main Wasm binary as well as helper Worker used by PThread integration to use a URL expression that can be both used directly by browsers as well as statically detected by bundlers like Webpack. The main caveats are: 1. Emscripten is very configurable, so some of the new conditions might look odd but are necessary to keep backward compatibility and allow overriding bundler-friendly URL with a custom one during runtime. 2. None of Closure, our fork of Terser, and `eval` (which is used by Emscripten's JS library preprocessing) support `import.meta` expressions without more work. While Closure seems to have _just_ implemented such support, and it wouldn't be too hard to add it to our Terser too, `eval` usage would still require a string replacement before execution (or complete revamp). To keep implementation simple, for now I went with just string replacement that covers all tools - this way, we replace `import.meta` -> `EMSCRIPTEN$IMPORT$META` only once per library when JS is added before any of this tooling is executed, and then replace back after everything is done right before the final emit. We might want to revisit this in future, but for now this works well and covers all the tooling incompatibilities together. 3. Webpack assumes that all modules are strict mode, so I updated `worker.js` correspondingly to avoid usages of global `this` (which is `undefined` in strict mode and breaks in bundled code) and instead using `self`; I've also updated the Node.js adapter code to satisfy strict requirements too and to be a bit simpler. 4. This won't work in Node.js, since it's not compatible with `EXPORT_ES6` in general yet. 5. I've only updated places for loading main Wasm binary and PThread code. This should cover majority of use-cases, but other external files like side modules, proxy-to-pthread, proxy-to-worker, external memory loading etc. are not covered by this PR and need to be updated separately if someone wants them to work with bundlers out of the box too. Fixes #13571. --- emcc.py | 13 ++++++++++++- src/closure-externs/closure-externs.js | 3 +++ src/library_pthread.js | 17 ++++++++++++++++- src/parseTools.js | 8 ++++++++ src/preamble.js | 7 ++++++- src/settings_internal.js | 3 +++ src/worker.js | 2 +- 7 files changed, 49 insertions(+), 4 deletions(-) diff --git a/emcc.py b/emcc.py index 6cc5dd59b9758..4a93332b5d708 100755 --- a/emcc.py +++ b/emcc.py @@ -1206,7 +1206,8 @@ def phase_setup(state): else: target = 'a.out.js' - settings.TARGET_BASENAME = unsuffixed_basename(target) + settings.TARGET_BASENAME_WITH_EXT = os.path.basename(target) + settings.TARGET_BASENAME = unsuffixed(settings.TARGET_BASENAME_WITH_EXT) if settings.EXTRA_EXPORTED_RUNTIME_METHODS: diagnostics.warning('deprecated', 'EXTRA_EXPORTED_RUNTIME_METHODS is deprecated, please use EXPORTED_RUNTIME_METHODS instead') @@ -2534,6 +2535,16 @@ def phase_final_emitting(options, target, wasm_target, memfile): shared.JS.handle_license(final_js) shared.run_process([shared.PYTHON, shared.path_from_root('tools', 'hacky_postprocess_around_closure_limitations.py'), final_js]) + # Unmangle previously mangled `import.meta` references in both main code and libraries. + # See also: `preprocess` in parseTools.js. + if settings.EXPORT_ES6 and settings.USE_ES6_IMPORT_META: + with open(final_js, 'r+') as f: + src = f.read() + src = src.replace('EMSCRIPTEN$IMPORT$META', 'import.meta') + f.seek(0) + f.write(src) + f.truncate() + # Apply pre and postjs files if options.extern_pre_js or options.extern_post_js: logger.debug('applying extern pre/postjses') diff --git a/src/closure-externs/closure-externs.js b/src/closure-externs/closure-externs.js index 5e81717de3ab3..37e1a4a0e5d54 100644 --- a/src/closure-externs/closure-externs.js +++ b/src/closure-externs/closure-externs.js @@ -11,6 +11,9 @@ * The closure_compiler() method in tools/shared.py refers to this file when calling closure. */ +// Special placeholder for `import.meta`. +var EMSCRIPTEN$IMPORT$META; + // Closure externs used by library_sockfs.js /** diff --git a/src/library_pthread.js b/src/library_pthread.js index 470f8325ac1b2..4bc001b758b7a 100644 --- a/src/library_pthread.js +++ b/src/library_pthread.js @@ -442,7 +442,11 @@ var LibraryPThread = { // it could load up the same file. In that case, developer must either deliver the Blob // object in Module['mainScriptUrlOrBlob'], or a URL to it, so that pthread Workers can // independently load up the same main application file. - 'urlOrBlob': Module['mainScriptUrlOrBlob'] || _scriptDir, + 'urlOrBlob': Module['mainScriptUrlOrBlob'] +#if !EXPORT_ES6 + || _scriptDir +#endif + , #if WASM2JS // the polyfill WebAssembly.Memory instance has function properties, // which will fail in postMessage, so just send a custom object with the @@ -469,6 +473,17 @@ var LibraryPThread = { #if MINIMAL_RUNTIME var pthreadMainJs = Module['worker']; #else +#if EXPORT_ES6 && USE_ES6_IMPORT_META + // If we're using module output and there's no explicit override, use bundler-friendly pattern. + if (!Module['locateFile']) { +#if PTHREADS_DEBUG + out('Allocating a new web worker from ' + new URL('{{{ PTHREAD_WORKER_FILE }}}', import.meta.url)); +#endif + // Use bundler-friendly `new Worker(new URL(..., import.meta.url))` pattern; works in browsers too. + PThread.unusedWorkers.push(new Worker(new URL('{{{ PTHREAD_WORKER_FILE }}}', import.meta.url))); + return; + } +#endif // Allow HTML module to configure the location where the 'worker.js' file will be loaded from, // via Module.locateFile() function. If not specified, then the default URL 'worker.js' relative // to the main html file is loaded. diff --git a/src/parseTools.js b/src/parseTools.js index e2ff8f927f089..15e00bd285428 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -33,6 +33,14 @@ function processMacros(text) { // Param filenameHint can be passed as a description to identify the file that is being processed, used // to locate errors for reporting and for html files to stop expansion between . function preprocess(text, filenameHint) { + if (EXPORT_ES6 && USE_ES6_IMPORT_META) { + // `eval`, Terser and Closure don't support module syntax; to allow it, + // we need to temporarily replace `import.meta` usages with placeholders + // during preprocess phase, and back after all the other ops. + // See also: `phase_final_emitting` in emcc.py. + text = text.replace(/\bimport\.meta\b/g, 'EMSCRIPTEN$IMPORT$META'); + } + const IGNORE = 0; const SHOW = 1; // This state is entered after we have shown one of the block of an if/elif/else sequence. diff --git a/src/preamble.js b/src/preamble.js index 8c5c422ebb410..35b52331c89bd 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -746,10 +746,15 @@ function instrumentWasmTableWithAbort() { } #endif +#if EXPORT_ES6 +// Use bundler-friendly `new URL(..., import.meta.url)` pattern; works in browsers too. +var wasmBinaryFile = new URL('{{{ WASM_BINARY_FILE }}}', import.meta.url).toString(); +#else var wasmBinaryFile = '{{{ WASM_BINARY_FILE }}}'; if (!isDataURI(wasmBinaryFile)) { wasmBinaryFile = locateFile(wasmBinaryFile); } +#endif function getBinary(file) { try { @@ -809,7 +814,7 @@ function getBinaryPromise() { } #endif } - + // Otherwise, getBinary should be able to get it synchronously return Promise.resolve().then(function() { return getBinary(wasmBinaryFile); }); } diff --git a/src/settings_internal.js b/src/settings_internal.js index 8ff5ef7e678f9..031b5434aefe6 100644 --- a/src/settings_internal.js +++ b/src/settings_internal.js @@ -32,6 +32,9 @@ var SIDE_MODULE_IMPORTS = []; // stores the base name of the output file (-o TARGET_BASENAME.js) var TARGET_BASENAME = ''; +// stores the base name of the output file with extension (TARGET_BASENAME.js or TARGET_BASENAME.mjs) +var TARGET_BASENAME_WITH_EXT = ''; + // Indicates that the syscalls (which we see statically) indicate that they need // full filesystem support. Otherwise, when just a small subset are used, we can // get away without including the full filesystem - in particular, if open() is diff --git a/src/worker.js b/src/worker.js index 4aea79943c87a..232ca55394d8e 100644 --- a/src/worker.js +++ b/src/worker.js @@ -168,7 +168,7 @@ self.onmessage = function(e) { #endif #if MODULARIZE && EXPORT_ES6 - import(e.data.urlOrBlob).then(function({{{ EXPORT_NAME }}}) { + (e.data.urlOrBlob ? import(e.data.urlOrBlob) : import('./{{{ TARGET_BASENAME_WITH_EXT }}}')).then(function({{{ EXPORT_NAME }}}) { return {{{ EXPORT_NAME }}}.default(Module); }).then(function(instance) { Module = instance; From 44128e2cc295d7274abb3aef65d11136b8ab4b89 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 10 May 2021 15:15:15 +0000 Subject: [PATCH 2/5] Calculate target JS name more reliably --- emcc.py | 14 ++++++++------ src/settings_internal.js | 4 ++-- src/worker.js | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/emcc.py b/emcc.py index 4a93332b5d708..202878289d5bf 100755 --- a/emcc.py +++ b/emcc.py @@ -1206,8 +1206,13 @@ def phase_setup(state): else: target = 'a.out.js' - settings.TARGET_BASENAME_WITH_EXT = os.path.basename(target) - settings.TARGET_BASENAME = unsuffixed(settings.TARGET_BASENAME_WITH_EXT) + if options.oformat in (OFormat.JS, OFormat.MJS): + js_target = target + else: + js_target = get_secondary_target(target, '.js') + settings.TARGET_JS_NAME = js_target + + settings.TARGET_BASENAME = unsuffixed_basename(target) if settings.EXTRA_EXPORTED_RUNTIME_METHODS: diagnostics.warning('deprecated', 'EXTRA_EXPORTED_RUNTIME_METHODS is deprecated, please use EXPORTED_RUNTIME_METHODS instead') @@ -2558,10 +2563,7 @@ def phase_final_emitting(options, target, wasm_target, memfile): shared.JS.handle_license(final_js) - if options.oformat in (OFormat.JS, OFormat.MJS): - js_target = target - else: - js_target = get_secondary_target(target, '.js') + js_target = settings.TARGET_JS_NAME # The JS is now final. Move it to its final location move_file(final_js, js_target) diff --git a/src/settings_internal.js b/src/settings_internal.js index 031b5434aefe6..959827d55a172 100644 --- a/src/settings_internal.js +++ b/src/settings_internal.js @@ -32,8 +32,8 @@ var SIDE_MODULE_IMPORTS = []; // stores the base name of the output file (-o TARGET_BASENAME.js) var TARGET_BASENAME = ''; -// stores the base name of the output file with extension (TARGET_BASENAME.js or TARGET_BASENAME.mjs) -var TARGET_BASENAME_WITH_EXT = ''; +// stores the base name (with extension) of the output JS file +var TARGET_JS_NAME = ''; // Indicates that the syscalls (which we see statically) indicate that they need // full filesystem support. Otherwise, when just a small subset are used, we can diff --git a/src/worker.js b/src/worker.js index 232ca55394d8e..31528497a808e 100644 --- a/src/worker.js +++ b/src/worker.js @@ -168,7 +168,7 @@ self.onmessage = function(e) { #endif #if MODULARIZE && EXPORT_ES6 - (e.data.urlOrBlob ? import(e.data.urlOrBlob) : import('./{{{ TARGET_BASENAME_WITH_EXT }}}')).then(function({{{ EXPORT_NAME }}}) { + (e.data.urlOrBlob ? import(e.data.urlOrBlob) : import('./{{{ TARGET_JS_NAME }}}')).then(function({{{ EXPORT_NAME }}}) { return {{{ EXPORT_NAME }}}.default(Module); }).then(function(instance) { Module = instance; From 1387b7a7b71336e2ecc76d378204ccc000489f70 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 10 May 2021 15:22:32 +0000 Subject: [PATCH 3/5] Match existing intermediate file pattern --- emcc.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/emcc.py b/emcc.py index 202878289d5bf..23e34f7c682c6 100755 --- a/emcc.py +++ b/emcc.py @@ -2543,12 +2543,11 @@ def phase_final_emitting(options, target, wasm_target, memfile): # Unmangle previously mangled `import.meta` references in both main code and libraries. # See also: `preprocess` in parseTools.js. if settings.EXPORT_ES6 and settings.USE_ES6_IMPORT_META: - with open(final_js, 'r+') as f: - src = f.read() - src = src.replace('EMSCRIPTEN$IMPORT$META', 'import.meta') - f.seek(0) - f.write(src) - f.truncate() + src = open(final_js).read() + final_js += '.esmeta.js' + with open(final_js, 'w') as f: + f.write(src.replace('EMSCRIPTEN$IMPORT$META', 'import.meta')) + save_intermediate('es6-import-meta') # Apply pre and postjs files if options.extern_pre_js or options.extern_post_js: From 43c6f23eeefc0459d2bf240ec6fbb18fe9b6cc04 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 10 May 2021 15:39:04 +0000 Subject: [PATCH 4/5] Move TARGET_BASENAME & TARGET_JS_NAME to post_link --- emcc.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/emcc.py b/emcc.py index 23e34f7c682c6..c685c8fe1ff35 100755 --- a/emcc.py +++ b/emcc.py @@ -1206,14 +1206,6 @@ def phase_setup(state): else: target = 'a.out.js' - if options.oformat in (OFormat.JS, OFormat.MJS): - js_target = target - else: - js_target = get_secondary_target(target, '.js') - settings.TARGET_JS_NAME = js_target - - settings.TARGET_BASENAME = unsuffixed_basename(target) - if settings.EXTRA_EXPORTED_RUNTIME_METHODS: diagnostics.warning('deprecated', 'EXTRA_EXPORTED_RUNTIME_METHODS is deprecated, please use EXPORTED_RUNTIME_METHODS instead') settings.EXPORTED_RUNTIME_METHODS += settings.EXTRA_EXPORTED_RUNTIME_METHODS @@ -2405,6 +2397,14 @@ def phase_post_link(options, in_wasm, wasm_target, target): if options.oformat != OFormat.WASM: final_js = in_temp(target_basename + '.js') + settings.TARGET_BASENAME = unsuffixed_basename(target) + + if options.oformat in (OFormat.JS, OFormat.MJS): + js_target = target + else: + js_target = get_secondary_target(target, '.js') + settings.TARGET_JS_NAME = js_target + if settings.MEM_INIT_IN_WASM: memfile = None else: From 1338c31483a75760ee47cc96e29cda587606e484 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 10 May 2021 23:13:41 +0100 Subject: [PATCH 5/5] Assign TARGET_JS_NAME directly --- emcc.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/emcc.py b/emcc.py index c685c8fe1ff35..4eb747f806950 100755 --- a/emcc.py +++ b/emcc.py @@ -2400,10 +2400,9 @@ def phase_post_link(options, in_wasm, wasm_target, target): settings.TARGET_BASENAME = unsuffixed_basename(target) if options.oformat in (OFormat.JS, OFormat.MJS): - js_target = target + settings.TARGET_JS_NAME = target else: - js_target = get_secondary_target(target, '.js') - settings.TARGET_JS_NAME = js_target + settings.TARGET_JS_NAME = get_secondary_target(target, '.js') if settings.MEM_INIT_IN_WASM: memfile = None