emscripten-core · kripken · Jan 14, 2022 · Nov 1, 2021 · Nov 1, 2021 · Nov 1, 2021
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -20,6 +20,11 @@ See docs/process.md for more on how version tagging works.
 
 3.1.2
 -----
+- `EVAL_CTORS` has been rewritten and improved. The main differences from before
+  are that it is much more capable (it can now eval parts of functions and not
+  just all or nothing, and it can eval more wasm constructs like globals). It is
+  no longer run by default, so to use it you should build with `-s EVAL_CTORS`.
+  See `settings.js` for more details. (#16011)
 - `wasmX` test suites that are defined in `test_core.py` have been renamed to
   `coreX` to better reflect where they are defined.  The old suite names such
   as `wasm2` will continue to work for now as aliases.

diff --git a/emcc.py b/emcc.py
@@ -1440,8 +1440,6 @@ def default_setting(name, new_default):
 
   if settings.OPT_LEVEL >= 1:
     default_setting('ASSERTIONS', 0)
-  if settings.SHRINK_LEVEL >= 2:
-    default_setting('EVAL_CTORS', 1)
 
   if options.emrun:
     options.pre_js.append(utils.path_from_root('src/emrun_prejs.js'))
@@ -2165,6 +2163,19 @@ def check_memory_setting(setting):
   if settings.SINGLE_FILE:
     settings.GENERATE_SOURCE_MAP = 0
 
+  if settings.EVAL_CTORS:
+    if settings.WASM2JS:
+      # code size/memory and correctness issues TODO
+      exit_with_error('EVAL_CTORS is not compatible with wasm2js yet')
+    elif settings.USE_PTHREADS:
+      exit_with_error('EVAL_CTORS is not compatible with pthreads yet (passive segments)')
+    elif settings.RELOCATABLE:
+      exit_with_error('EVAL_CTORS is not compatible with relocatable yet (movable segments)')
+    elif settings.ASYNCIFY:
+      # In Asyncify exports can be called more than once, and this seems to not
+      # work properly yet (see test_emscripten_scan_registers).
+      exit_with_error('EVAL_CTORS is not compatible with asyncify yet')
+
   if options.use_closure_compiler == 2 and not settings.WASM2JS:
     exit_with_error('closure compiler mode 2 assumes the code is asm.js, so not meaningful for wasm')
 

diff --git a/emscripten.py b/emscripten.py
@@ -35,14 +35,12 @@
 
 logger = logging.getLogger('emscripten')
 
-WASM_INIT_FUNC = '__wasm_call_ctors'
-
 
 def compute_minimal_runtime_initializer_and_exports(post, exports, receiving):
   # Declare all exports out to global JS scope so that JS library functions can access them in a
   # way that minifies well with Closure
   # e.g. var a,b,c,d,e,f;
-  exports_that_are_not_initializers = [x for x in exports if x not in WASM_INIT_FUNC]
+  exports_that_are_not_initializers = [x for x in exports if x not in building.WASM_CALL_CTORS]
   # In Wasm backend the exports are still unmangled at this point, so mangle the names here
   exports_that_are_not_initializers = [asmjs_mangle(x) for x in exports_that_are_not_initializers]
 
@@ -780,7 +778,7 @@ def create_receiving(exports):
   if not settings.DECLARE_ASM_MODULE_EXPORTS:
     return ''
 
-  exports_that_are_not_initializers = [x for x in exports if x != WASM_INIT_FUNC]
+  exports_that_are_not_initializers = [x for x in exports if x != building.WASM_CALL_CTORS]
 
   receiving = []
 

diff --git a/site/source/docs/optimizing/Optimizing-Code.rst b/site/source/docs/optimizing/Optimizing-Code.rst
@@ -119,6 +119,59 @@ linker can handle a mix wasm object files and LTO object files.  Passing
 Thus, to allow maximal LTO opportunities with the LLVM wasm backend, build all
 source files with ``-flto`` and also link with ``flto``.
 
+EVAL_CTORS
+==========
+
+Building with ``-sEVAL_CTORS`` will evaluate as much code as possible at
+compile time. That includes both the "global ctor" functions (functions LLVM
+emits that run before ``main()``) as well as ``main()`` itself. As much as can
+be evaluated will be, and the resulting state is then "snapshotted" into the
+wasm. Then when the program is run it will begin from that state, and not need
+to execute that code, which can save time.
+
+This optimization can either reduce or increase code size. If a small amount
+of code generates many changes in memory, for example, then overall size may
+increase. It is best to build with this flag and then measure code and startup
+speed and see if the tradeoff is worthwhile in your program.
+
+You can make an effort to write EVAL_CTORS-friendly code, by deferring things
+that cannot be evalled as much as possible. For example, calls to imports stop
+this optimization, and so if you have a game engine that creates a GL context
+and then does some pure computation to set up unrelated data structures in
+memory, then you could reverse that order. Then the pure computation could run
+first, and be evalled away, and the GL context creation call to an import would
+not prevent that. Other things you can do are to avoid using ``argc/argv``, to
+avoid using ``getenv()``, and so forth.
+
+Logging is shown when using this option so that you can see whether things can
+be improved. Here is an example of output from ``emcc -sEVAL_CTORS``:
+
+::
+
+  trying to eval __wasm_call_ctors
+    ...partial evalling successful, but stopping since could not eval: call import: wasi_snapshot_preview1.environ_sizes_get
+         recommendation: consider --ignore-external-input
+    ...stopping
+
+The first line indicates an attempt to eval LLVM's function that runs global
+ctors. It evalled some of the function but then it stopped on the WASI import
+``environ_sizes_get``, which means it is trying to read from the environment.
+As the output says, you can tell ``EVAL_CTORS`` to ignore external input, which
+will ignore such things. You can enable that with mode ``2``, that is, build
+with ``emcc -sEVAL_CTORS=2``:
+
+::
+
+  trying to eval __wasm_call_ctors
+    ...success on __wasm_call_ctors.
+  trying to eval main
+    ...stopping (in block) since could not eval: call import: wasi_snapshot_preview1.fd_write
+    ...stopping
+
+Now it has succeeded to eval ``__wasm_call_ctors`` completely. It then moved on
+to ``main``, where it stopped because of a call to WASI's ``fd_write``, that is,
+a call to print something.
+
 Very large codebases
 ====================
 

diff --git a/src/settings.js b/src/settings.js
@@ -1535,40 +1535,40 @@ var ALLOW_BLOCKING_ON_MAIN_THREAD = 1;
 // [link]
 var PTHREADS_DEBUG = 0;
 
-// This tries to evaluate global ctors at compile-time, applying their effects
-// into the mem init file. This saves running code during startup, and also
-// allows removing the global ctor functions and other code that only they used,
-// so this is also good for reducing code size. However, this does make the
-// compile step much slower.
-//
-// This basically runs the ctors during compile time, seeing if they execute
-// safely in a sandbox. Any ffi access out of wasm causes failure, as it could
-// do something nondeterministic and/or alter some other state we don't see. If
-// all the global ctor does is pure computation inside wasm, it should be ok.
-// Run with EMCC_DEBUG=1 in the env to see logging, and errors when it fails to
-// eval (you'll see a message, or a stack trace; in the latter case, the
-// functions on the stack should give you an idea of what ffi was called and
-// why, and perhaps you can refactor your code to avoid it, e.g., remove
-// mallocs, printfs in global ctors).
-//
-// This optimization can increase the size of the mem init file, because ctors
-// can write to memory that would otherwise be in a zeroinit area. This may not
-// be a significant increase after gzip, if there are mostly zeros in there, and
-// in any case the mem init increase would be offset by a code size decrease.
-// (Unless you have a small ctor that writes 'random' data to memory, which
-// would reduce little code but add potentially lots of uncompressible data.)
+// This tries to evaluate code at compile time. The main use case is to eval
+// global ctor functions, which are those that run before main(), but main()
+// itself or parts of it can also be evalled. Evaluating code this way can avoid
+// work at runtime, as it applies the results of the execution to memory and
+// globals and so forth, "snapshotting" the wasm and then just running it from
+// there when it is loaded.
+//
+// This will stop when it sees something it cannot eval at compile time, like a
+// call to an import. When running with this option you will see logging that
+// indicates what is evalled and where it stops.
+//
+// This optimization can either reduce or increase code size. If a small amount
+// of code generates many changes in memory, for example, then overall size may
+// increase.
 //
 // LLVM's GlobalOpt *almost* does this operation. It does in simple cases, where
 // LLVM IR is not too complex for its logic to evaluate, but it isn't powerful
 // enough for e.g. libc++ iostream ctors. It is just hard to do at the LLVM IR
-// level - LLVM IR is complex and getting more complex, this would require
+// level - LLVM IR is complex and getting more complex, so this would require
 // GlobalOpt to have a full interpreter, plus a way to write back into LLVM IR
 // global objects.  At the wasm level, however, everything has been lowered
 // into a simple low level, and we also just need to write bytes into an array,
-// so this is easy for us to do, but not for LLVM. A further issue for LLVM is
-// that it doesn't know that we will not link in further code, so it only tries
-// to optimize ctors with lowest priority. We do know that, and can optimize all
-// the ctors.
+// so this is easy for us to do. A further issue for LLVM is that it doesn't
+// know that we will not link in further code, so it only tries to optimize
+// ctors with lowest priority (while we do know explicitly if dynamic linking is
+// enabled or not).
+//
+// If set to a value of 2, this also makes some "unsafe" assumptions,
+// specifically that there is no input received while evalling ctors. That means
+// we ignore args to main() as well as assume no environment vars are readable.
+// This allows more programs to be optimized, but you need to make sure your
+// program does not depend on those features - even just checking the value of
+// argc can lead to problems.
+//
 // [link]
 var EVAL_CTORS = 0;
 

diff --git a/tests/common.py b/tests/common.py
@@ -642,6 +642,11 @@ def is_exported_in_wasm(self, name, wasm):
     wat = self.get_wasm_text(wasm)
     return ('(export "%s"' % name) in wat
 
+  def measure_wasm_code_lines(self, wasm):
+    wat_lines = self.get_wasm_text(wasm).splitlines()
+    non_data_lines = [line for line in wat_lines if '(data ' not in line]
+    return len(non_data_lines)
+
   def run_js(self, filename, engine=None, args=[],
              output_nicerizer=None,
              assert_returncode=0,

diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.exports b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.exports
@@ -0,0 +1,13 @@
+__errno_location
+__indirect_function_table
+__wasm_call_ctors
+dynCall_iiiiiijj
+dynCall_iiiiij
+dynCall_iiiiijj
+dynCall_jiji
+dynCall_viijii
+main
+memory
+stackAlloc
+stackRestore
+stackSave
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.imports b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.imports
@@ -0,0 +1,11 @@
+env.abort
+env.emscripten_memcpy_big
+env.emscripten_resize_heap
+env.setTempRet0
+env.strftime_l
+wasi_snapshot_preview1.environ_get
+wasi_snapshot_preview1.environ_sizes_get
+wasi_snapshot_preview1.fd_close
+wasi_snapshot_preview1.fd_read
+wasi_snapshot_preview1.fd_seek
+wasi_snapshot_preview1.fd_write
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.jssize b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.jssize
@@ -0,0 +1 @@
+98089
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.sent b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.sent
@@ -0,0 +1,11 @@
+abort
+emscripten_memcpy_big
+emscripten_resize_heap
+environ_get
+environ_sizes_get
+fd_close
+fd_read
+fd_seek
+fd_write
+setTempRet0
+strftime_l
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.size b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS.size
@@ -0,0 +1 @@
+124645
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.exports b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.exports
@@ -0,0 +1,12 @@
+__errno_location
+__indirect_function_table
+dynCall_iiiiiijj
+dynCall_iiiiij
+dynCall_iiiiijj
+dynCall_jiji
+dynCall_viijii
+main
+memory
+stackAlloc
+stackRestore
+stackSave
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.imports b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.imports
@@ -0,0 +1,9 @@
+env.abort
+env.emscripten_memcpy_big
+env.emscripten_resize_heap
+env.setTempRet0
+env.strftime_l
+wasi_snapshot_preview1.fd_close
+wasi_snapshot_preview1.fd_read
+wasi_snapshot_preview1.fd_seek
+wasi_snapshot_preview1.fd_write
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.jssize b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.jssize
@@ -0,0 +1 @@
+97987
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.sent b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.sent
@@ -0,0 +1,11 @@
+abort
+emscripten_memcpy_big
+emscripten_resize_heap
+environ_get
+environ_sizes_get
+fd_close
+fd_read
+fd_seek
+fd_write
+setTempRet0
+strftime_l
diff --git a/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.size b/tests/other/metadce/hello_libcxx_O2_EVAL_CTORS_2.size
@@ -0,0 +1 @@
+122060
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.exports b/tests/other/metadce/minimal_Oz_EVAL_CTORS.exports
@@ -0,0 +1,3 @@
+a
+b
+c
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.funcs b/tests/other/metadce/minimal_Oz_EVAL_CTORS.funcs
@@ -0,0 +1 @@
+$add
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.imports b/tests/other/metadce/minimal_Oz_EVAL_CTORS.imports
@@ -0,0 +1 @@
+
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.jssize b/tests/other/metadce/minimal_Oz_EVAL_CTORS.jssize
@@ -0,0 +1 @@
+11845
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.sent b/tests/other/metadce/minimal_Oz_EVAL_CTORS.sent
@@ -0,0 +1 @@
+
diff --git a/tests/other/metadce/minimal_Oz_EVAL_CTORS.size b/tests/other/metadce/minimal_Oz_EVAL_CTORS.size
@@ -0,0 +1 @@
+62
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -379,6 +379,7 @@ def cleanup(self):
     benchmarkers += [
       EmscriptenBenchmarker(default_v8_name, aot_v8),
       EmscriptenBenchmarker(default_v8_name + '-lto', aot_v8, ['-flto']),
+      EmscriptenBenchmarker(default_v8_name + '-ctors', aot_v8, ['-sEVAL_CTORS']),
       # EmscriptenWasm2CBenchmarker('wasm2c')
     ]
   if os.path.exists(CHEERP_BIN):
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    a
+    b
+    c