From f11d6196dd4e8748a726f19895c859b40ff6a4f3 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 8 Dec 2022 11:15:20 -0800 Subject: [PATCH] Enable LLD_REPORT_UNDEFINED by default (#16003) This makes undefined symbol errors more precise by including the name of the object that references the undefined symbol. Its also paves the way (in my mind anyway) for finally fixing reverse dependencies in a salable way. See #15982. That PR uses an alternative script for the pre-processing of dependencies but also fundamentally relies on processing JS libraries both before and after linking. The cost is about 300ms per link operation due to double processing of the JS libraries, but results are cached so in practice this only happens the first time a given link command is run (see #18326). --- ChangeLog.md | 5 +++++ emcc.py | 10 +++------ src/settings.js | 11 +++++----- test/test_core.py | 17 ++++++--------- test/test_other.py | 47 ++++++++++++++++++++-------------------- tools/gen_struct_info.py | 1 - 6 files changed, 44 insertions(+), 47 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index e21af0489463..f263608549f7 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,11 @@ See docs/process.md for more on how version tagging works. 3.1.28 (in development) ----------------------- +- `LLD_REPORT_UNDEFINED` is now enabled by default. This makes undefined symbol + errors more precise by including the name of the object that references the + undefined symbol. The old behaviour (of allowing all undefined symbols at + wasm-ld time and reporting them later when processing JS library files) is + still available using `-sLLD_REPORT_UNDEFINED=0`. (#16003) - musl libc updated from v1.2.2 to v1.2.3. (#18270) - The default emscripten config file no longer contains `EMSCRIPTEN_ROOT`. This setting has long been completely ignored by emscripten itself. For diff --git a/emcc.py b/emcc.py index e7182bc12e31..010d37a4b3f5 100755 --- a/emcc.py +++ b/emcc.py @@ -553,9 +553,8 @@ def build_symbol_list(filename): library_syms = read_file(filename).splitlines() # Limit of the overall size of the cache to 100 files. - # This code will get test coverage once we make LLD_REPORT_UNDEFINED the default - # since under those circumstances a full test run of `other` or `core` generates - # ~1000 unique symbol lists. + # This code will get test coverage since a full test run of `other` or `core` + # generates ~1000 unique symbol lists. cache_limit = 100 root = cache.get_path('symbol_lists') if len(os.listdir(root)) > cache_limit: @@ -1898,10 +1897,7 @@ def phase_linker_setup(options, state, newargs): if not settings.PURE_WASI and '-nostdlib' not in newargs and '-nodefaultlibs' not in newargs: default_setting('STACK_OVERFLOW_CHECK', max(settings.ASSERTIONS, settings.STACK_OVERFLOW_CHECK)) - if settings.LLD_REPORT_UNDEFINED or settings.STANDALONE_WASM: - # Reporting undefined symbols at wasm-ld time requires us to know if we have a `main` function - # or not, as does standalone wasm mode. - # TODO(sbc): Remove this once this becomes the default + if settings.STANDALONE_WASM: settings.IGNORE_MISSING_MAIN = 0 # For users that opt out of WARN_ON_UNDEFINED_SYMBOLS we assume they also diff --git a/src/settings.js b/src/settings.js index 9ec1915fc230..5aea30cd1343 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1917,12 +1917,13 @@ var USE_OFFSET_CONVERTER = false; // This is enabled automatically when using -g4 with sanitizers. var LOAD_SOURCE_MAP = false; -// If set to 1, the JS compiler is run before wasm-ld so that the linker can -// report undefined symbols within the binary. Without this option the linker -// doesn't know which symbols might be defined in JS so reporting of undefined -// symbols is delayed until the JS compiler is run. +// If set to 0, delay undefined symbol report until after wasm-ld runs. This +// avoids running the the JS compiler prior to wasm-ld, but reduces the amount +// of information in the undefined symbol message (Since JS compiler cannot +// report the name of the object file that contains the reference to the +// undefined symbol). // [link] -var LLD_REPORT_UNDEFINED = false; +var LLD_REPORT_UNDEFINED = true; // Default to c++ mode even when run as `emcc` rather then `emc++`. // When this is disabled `em++` is required when compiling and linking C++ diff --git a/test/test_core.py b/test/test_core.py index 09f127e9fe74..313e26ebb2e8 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -4149,8 +4149,8 @@ def test_dylink_basics_no_modify(self): self.do_basic_dylink_test() @needs_dylink - def test_dylink_basics_lld_report_undefined(self): - self.set_setting('LLD_REPORT_UNDEFINED') + def test_dylink_basics_no_lld_report_undefined(self): + self.set_setting('LLD_REPORT_UNDEFINED', 0) self.do_basic_dylink_test() @needs_dylink @@ -5150,9 +5150,6 @@ def test_dylink_rtti(self): # in the another module. # Each module will define its own copy of certain COMDAT symbols such as # each classs's typeinfo, but at runtime they should both use the same one. - # Use LLD_REPORT_UNDEFINED to test that it works as expected with weak/COMDAT - # symbols. - self.set_setting('LLD_REPORT_UNDEFINED') header = ''' #include @@ -6161,7 +6158,6 @@ def test_unistd_io(self): 'nodefs': (['NODEFS']), }) def test_unistd_misc(self, fs): - self.set_setting('LLD_REPORT_UNDEFINED') self.emcc_args += ['-D' + fs] if fs == 'NODEFS': self.require_node() @@ -9416,9 +9412,8 @@ def test_undefined_main(self): # In standalone we don't support implicitly building without main. The user has to explicitly # opt out (see below). err = self.expect_fail([EMCC, test_file('core/test_ctors_no_main.cpp')] + self.get_emcc_args()) - self.assertContained('error: undefined symbol: main/__main_argc_argv (referenced by top-level compiled C/C++ code)', err) - self.assertContained('warning: To build in STANDALONE_WASM mode without a main(), use emcc --no-entry', err) - elif not self.get_setting('LLD_REPORT_UNDEFINED') and not self.get_setting('STRICT'): + self.assertContained('undefined symbol: main', err) + elif not self.get_setting('STRICT'): # Traditionally in emscripten we allow main to be implicitly undefined. This allows programs # with a main and libraries without a main to be compiled identically. # However we are trying to move away from that model to a more explicit opt-out model. See: @@ -9436,6 +9431,9 @@ def test_undefined_main(self): self.do_core_test('test_ctors_no_main.cpp') self.clear_setting('EXPORTED_FUNCTIONS') + # Marked as impure since the WASI reactor modules (modules without main) + # are not yet suppored by the wasm engines we test against. + @also_with_standalone_wasm(impure=True) def test_undefined_main_explict(self): # If we pass --no-entry this test should compile without issue self.emcc_args.append('--no-entry') @@ -9708,7 +9706,6 @@ def setUp(self): settings={'ALLOW_MEMORY_GROWTH': 1}) # Experimental modes (not tested by CI) -lld = make_run('lld', emcc_args=[], settings={'LLD_REPORT_UNDEFINED': 1}) minimal0 = make_run('minimal0', emcc_args=['-g'], settings={'MINIMAL_RUNTIME': 1}) # TestCoreBase is just a shape for the specific subclasses, we don't test it itself diff --git a/test/test_other.py b/test/test_other.py index 94229d3a70fc..f88e1a28db6f 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -2167,8 +2167,8 @@ def test_undefined_symbols(self, action): print(proc.stderr) if value or action is None: # The default is that we error in undefined symbols - self.assertContained('error: undefined symbol: something', proc.stderr) - self.assertContained('error: undefined symbol: elsey', proc.stderr) + self.assertContained('undefined symbol: something', proc.stderr) + self.assertContained('undefined symbol: elsey', proc.stderr) check_success = False elif action == 'ERROR' and not value: # Error disables, should only warn @@ -3548,7 +3548,7 @@ def test_js_lib_missing_sig(self): def test_js_lib_quoted_key(self): create_file('lib.js', r''' mergeInto(LibraryManager.library, { - __internal_data:{ + __internal_data:{ '<' : 0, 'white space' : 1 }, @@ -6591,7 +6591,7 @@ def test_no_warn_exported_jslibfunc(self): err = self.expect_fail([EMCC, test_file('hello_world.c'), '-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=alGetError', '-sEXPORTED_FUNCTIONS=_main,_alGet']) - self.assertContained('undefined exported symbol: "_alGet"', err) + self.assertContained('error: undefined exported symbol: "_alGet" [-Wundefined] [-Werror]', err) def test_musl_syscalls(self): self.run_process([EMCC, test_file('hello_world.c')]) @@ -8361,7 +8361,7 @@ def test_full_js_library(self): def test_full_js_library_undefined(self): create_file('main.c', 'void foo(); int main() { foo(); return 0; }') err = self.expect_fail([EMCC, 'main.c', '-sSTRICT_JS', '-sINCLUDE_FULL_LIBRARY']) - self.assertContained('error: undefined symbol: foo', err) + self.assertContained('undefined symbol: foo', err) def test_full_js_library_except(self): self.set_setting('INCLUDE_FULL_LIBRARY', 1) @@ -9017,19 +9017,20 @@ def test_js_preprocess(self): err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js'], stderr=PIPE).stderr self.assertContained('JSLIB: none of the above', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: MAIN_MODULE', err) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE'], stderr=PIPE).stderr self.assertContained('JSLIB: MAIN_MODULE=1', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE=2'], stderr=PIPE).stderr self.assertContained('JSLIB: MAIN_MODULE=2', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: EXIT_RUNTIME', err) err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sEXIT_RUNTIME'], stderr=PIPE).stderr self.assertContained('JSLIB: EXIT_RUNTIME', err) - self.assertEqual(err.count('JSLIB'), 1) + self.assertNotContained('JSLIB: MAIN_MODULE', err) def test_html_preprocess(self): src_file = test_file('module/test_stdin.c') @@ -9202,7 +9203,7 @@ def test_dash_s_list_parsing(self): # stray slash ('EXPORTED_FUNCTIONS=["_a", "_b",\\ "_c", "_d"]', 'undefined exported symbol: "\\\\ "_c"'), # missing comma - ('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'undefined exported symbol: "_b" "_c"'), + ('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'emcc: error: undefined exported symbol: "_b" "_c" [-Wundefined] [-Werror]'), ]: print(export_arg) proc = self.run_process([EMCC, 'src.c', '-s', export_arg], stdout=PIPE, stderr=PIPE, check=not expected) @@ -10886,20 +10887,20 @@ def test_signature_mismatch(self): self.expect_fail([EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c']) self.expect_fail([EMCC, '-sSTRICT', 'a.c', 'b.c']) + # TODO(sbc): Remove these tests once we remove the LLD_REPORT_UNDEFINED def test_lld_report_undefined(self): create_file('main.c', 'void foo(); int main() { foo(); return 0; }') - stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED', 'main.c']) - self.assertContained('wasm-ld: error:', stderr) - self.assertContained('main_0.o: undefined symbol: foo', stderr) + stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED=0', 'main.c']) + self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', stderr) def test_lld_report_undefined_reverse_deps(self): - self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sREVERSE_DEPS=all', test_file('hello_world.c')]) + self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sREVERSE_DEPS=all', test_file('hello_world.c')]) def test_lld_report_undefined_exceptions(self): - self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED', '-fwasm-exceptions', test_file('hello_libcxx.cpp')]) + self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED=0', '-fwasm-exceptions', test_file('hello_libcxx.cpp')]) def test_lld_report_undefined_main_module(self): - self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sMAIN_MODULE=2', test_file('hello_world.c')]) + self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sMAIN_MODULE=2', test_file('hello_world.c')]) # Verifies that warning messages that Closure outputs are recorded to console def test_closure_warnings(self): @@ -11037,14 +11038,12 @@ def test_linker_version(self): def test_chained_js_error_diagnostics(self): err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c'), '--js-library', test_file('test_chained_js_error_diagnostics.js')]) self.assertContained("error: undefined symbol: nonexistent_function (referenced by bar__deps: ['nonexistent_function'], referenced by foo__deps: ['bar'], referenced by top-level compiled C/C++ code)", err) - # Check that we don't recommend LLD_REPORT_UNDEFINED for chained dependencies. - self.assertNotContained('LLD_REPORT_UNDEFINED', err) - # Test without chaining. In this case we don't include the JS library at all resulting in `foo` - # being undefined in the native code and in this case we recommend LLD_REPORT_UNDEFINED. + # Test without chaining. In this case we don't include the JS library at + # all resulting in `foo` being undefined in the native code. err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c')]) - self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', err) - self.assertContained('Link with `-sLLD_REPORT_UNDEFINED` to get more information on undefined symbols', err) + self.assertContained('undefined symbol: foo', err) + self.assertNotContained('referenced by top-level compiled C/C++ code', err) def test_xclang_flag(self): create_file('foo.h', ' ') @@ -11846,7 +11845,7 @@ def test_no_main_with_PROXY_TO_PTHREAD(self): void foo() {} ''') err = self.expect_fail([EMCC, 'lib.cpp', '-pthread', '-sPROXY_TO_PTHREAD']) - self.assertContained('error: PROXY_TO_PTHREAD proxies main() for you, but no main exists', err) + self.assertContained('crt1_proxy_main.o: undefined symbol: main', err) def test_archive_bad_extension(self): # Regression test for https://github.com/emscripten-core/emscripten/issues/14012 @@ -11888,7 +11887,7 @@ def test_unimplemented_syscalls(self, args): cmd = [EMCC, 'main.c', '-sASSERTIONS'] + args if args: err = self.expect_fail(cmd) - self.assertContained('error: undefined symbol: __syscall_mincore', err) + self.assertContained('libc-debug.a(mincore.o): undefined symbol: __syscall_mincore', err) else: self.run_process(cmd) err = self.run_js('a.out.js') diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py index 2062db5b6ab3..12d1da198cb9 100755 --- a/tools/gen_struct_info.py +++ b/tools/gen_struct_info.py @@ -269,7 +269,6 @@ def inspect_headers(headers, cflags): '-nostdlib', compiler_rt, '-sBOOTSTRAPPING_STRUCT_INFO', - '-sLLD_REPORT_UNDEFINED', '-sSTRICT', '-sASSERTIONS=0', # Use SINGLE_FILE so there is only a single