Skip to content

Commit

Permalink
Enable LLD_REPORT_UNDEFINED by default
Browse files Browse the repository at this point in the history
This makes undefined symbol errors more precise by including the name of
the object that references the undefined symbol.

Its also paves the way (in my mind anyway) for finally fixing reverse
dependencies in a salable way.  See #15982.  That PR uses an alternative
script for the pre-processing of dependencies but also fundamentally
relies on processing JS libraries both before and after linking.

The cost is about 300ms per link operation due to double processing of
the JS libraries.  This cost is fixed for most projects (since most
project don't add a lot JS libraries over time in the way that they add
native code object).  I imagine even in the most pathological cases JS
libraries usage will be dwarfed by native object file usage so even in
those cases the native linking will likely always dominate the link
time.

If the 300ms extra link time causes issues, for example with cmake or
autoconf, that do a lot linking of small programs, we could consider
hashing the config setting and caching the result of the processing
based on them.
  • Loading branch information
sbc100 committed Dec 6, 2022
1 parent f5c82d5 commit 30c3a36
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 50 deletions.
5 changes: 5 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ See docs/process.md for more on how version tagging works.

3.1.28 (in development)
-----------------------
- `LLD_REPORT_UNDEFINED` is now enabled by default. This makes undefined symbol
errors more precise by including the name of the object that references the
undefined symbol. The old behaviour (of allowing all undefined symbols at
wasm-ld time and reporting them later when processing JS library files) is
still available using `-sLLD_REPORT_UNDEFINED=0`. (#16003)
- musl libc updated from v1.2.2 to v1.2.3. (#18270)
- The default emscripten config file no longer contains `EMSCRIPTEN_ROOT`. This
setting has long been completely ignored by emscripten itself. For
Expand Down
8 changes: 3 additions & 5 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def generate_js_symbols():
def get_all_js_syms():
# Avoiding using the cache when generating struct info since
# this step is performed while the cache is locked.
if settings.BOOTSTRAPPING_STRUCT_INFO:
if settings.BOOTSTRAPPING_STRUCT_INFO or config.FROZEN_CACHE:
return generate_js_symbols()

# To avoid the cost of calling generate_js_symbols each time an executable is
Expand Down Expand Up @@ -1871,6 +1871,7 @@ def phase_linker_setup(options, state, newargs):
if 'EXPORTED_FUNCTIONS' in user_settings:
if '_main' not in settings.USER_EXPORTED_FUNCTIONS:
settings.EXPECT_MAIN = 0
settings.IGNORE_MISSING_MAIN = 1
else:
assert not settings.EXPORTED_FUNCTIONS
settings.EXPORTED_FUNCTIONS = ['_main']
Expand Down Expand Up @@ -1899,10 +1900,7 @@ def phase_linker_setup(options, state, newargs):
if not settings.PURE_WASI and '-nostdlib' not in newargs and '-nodefaultlibs' not in newargs:
default_setting('STACK_OVERFLOW_CHECK', max(settings.ASSERTIONS, settings.STACK_OVERFLOW_CHECK))

if settings.LLD_REPORT_UNDEFINED or settings.STANDALONE_WASM:
# Reporting undefined symbols at wasm-ld time requires us to know if we have a `main` function
# or not, as does standalone wasm mode.
# TODO(sbc): Remove this once this becomes the default
if settings.STANDALONE_WASM:
settings.IGNORE_MISSING_MAIN = 0

# For users that opt out of WARN_ON_UNDEFINED_SYMBOLS we assume they also
Expand Down
10 changes: 6 additions & 4 deletions src/library.js
Original file line number Diff line number Diff line change
Expand Up @@ -1258,17 +1258,19 @@ mergeInto(LibraryManager.library, {
// built with SUPPORT_LONGJMP=1, the object file contains references of not
// longjmp but _emscripten_throw_longjmp, which is called from
// emscripten_longjmp.
_emscripten_throw_longjmp: function() { error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)'); },
get _emscripten_throw_longjmp__deps() {
return this.longjmp__deps;
},
#endif
_emscripten_throw_longjmp: function() {
error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)');
},
// will never be emitted, as the dep errors at compile time
longjmp: function(env, value) {
abort('longjmp not supported');
abort('longjmp not supported (build with -s SUPPORT_LONGJMP)');
},
setjmp: function(env, value) {
abort('setjmp not supported');
setjmp: function(env) {
abort('setjmp not supported (build with -s SUPPORT_LONGJMP)');
},
#endif

Expand Down
11 changes: 6 additions & 5 deletions src/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -1917,12 +1917,13 @@ var USE_OFFSET_CONVERTER = false;
// This is enabled automatically when using -g4 with sanitizers.
var LOAD_SOURCE_MAP = false;

// If set to 1, the JS compiler is run before wasm-ld so that the linker can
// report undefined symbols within the binary. Without this option the linker
// doesn't know which symbols might be defined in JS so reporting of undefined
// symbols is delayed until the JS compiler is run.
// If set to 0, delay undefined symbol report until after wasm-ld runs. This
// avoids running the the JS compiler prior to wasm-ld, but reduces the amount
// of information in the undefined symbol message (Since JS compiler cannot
// report the name of the object file that contains the reference to the
// undefined symbol).
// [link]
var LLD_REPORT_UNDEFINED = false;
var LLD_REPORT_UNDEFINED = true;

// Default to c++ mode even when run as `emcc` rather then `emc++`.
// When this is disabled `em++` is required when compiling and linking C++
Expand Down
2 changes: 2 additions & 0 deletions test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,8 @@ def expect_fail(self, cmd, expect_traceback=False, **args):
self.assertContained('Traceback', proc.stderr)
elif not WINDOWS or 'Access is denied' not in proc.stderr:
self.assertNotContained('Traceback', proc.stderr)
if EMTEST_VERBOSE:
sys.stderr.write(proc.stderr)
return proc.stderr

# excercise dynamic linker.
Expand Down
17 changes: 7 additions & 10 deletions test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4142,8 +4142,8 @@ def test_dylink_basics_no_modify(self):
self.do_basic_dylink_test()

@needs_dylink
def test_dylink_basics_lld_report_undefined(self):
self.set_setting('LLD_REPORT_UNDEFINED')
def test_dylink_basics_no_lld_report_undefined(self):
self.set_setting('LLD_REPORT_UNDEFINED', 0)
self.do_basic_dylink_test()

@needs_dylink
Expand Down Expand Up @@ -5143,9 +5143,6 @@ def test_dylink_rtti(self):
# in the another module.
# Each module will define its own copy of certain COMDAT symbols such as
# each classs's typeinfo, but at runtime they should both use the same one.
# Use LLD_REPORT_UNDEFINED to test that it works as expected with weak/COMDAT
# symbols.
self.set_setting('LLD_REPORT_UNDEFINED')
header = '''
#include <cstddef>
Expand Down Expand Up @@ -6154,7 +6151,6 @@ def test_unistd_io(self):
'nodefs': (['NODEFS']),
})
def test_unistd_misc(self, fs):
self.set_setting('LLD_REPORT_UNDEFINED')
self.emcc_args += ['-D' + fs]
if fs == 'NODEFS':
self.require_node()
Expand Down Expand Up @@ -9407,9 +9403,8 @@ def test_undefined_main(self):
# In standalone we don't support implicitly building without main. The user has to explicitly
# opt out (see below).
err = self.expect_fail([EMCC, test_file('core/test_ctors_no_main.cpp')] + self.get_emcc_args())
self.assertContained('error: undefined symbol: main/__main_argc_argv (referenced by top-level compiled C/C++ code)', err)
self.assertContained('warning: To build in STANDALONE_WASM mode without a main(), use emcc --no-entry', err)
elif not self.get_setting('LLD_REPORT_UNDEFINED') and not self.get_setting('STRICT'):
self.assertContained('undefined symbol: main', err)
elif not self.get_setting('STRICT'):
# Traditionally in emscripten we allow main to be implicitly undefined. This allows programs
# with a main and libraries without a main to be compiled identically.
# However we are trying to move away from that model to a more explicit opt-out model. See:
Expand All @@ -9427,6 +9422,9 @@ def test_undefined_main(self):
self.do_core_test('test_ctors_no_main.cpp')
self.clear_setting('EXPORTED_FUNCTIONS')

# Marked as impure since the WASI reactor modules (modules without main)
# are not yet suppored by the wasm engines we test against.
@also_with_standalone_wasm(impure=True)
def test_undefined_main_explict(self):
# If we pass --no-entry this test should compile without issue
self.emcc_args.append('--no-entry')
Expand Down Expand Up @@ -9699,7 +9697,6 @@ def setUp(self):
settings={'ALLOW_MEMORY_GROWTH': 1})

# Experimental modes (not tested by CI)
lld = make_run('lld', emcc_args=[], settings={'LLD_REPORT_UNDEFINED': 1})
minimal0 = make_run('minimal0', emcc_args=['-g'], settings={'MINIMAL_RUNTIME': 1})

# TestCoreBase is just a shape for the specific subclasses, we don't test it itself
Expand Down
49 changes: 24 additions & 25 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -2167,8 +2167,8 @@ def test_undefined_symbols(self, action):
print(proc.stderr)
if value or action is None:
# The default is that we error in undefined symbols
self.assertContained('error: undefined symbol: something', proc.stderr)
self.assertContained('error: undefined symbol: elsey', proc.stderr)
self.assertContained('undefined symbol: something', proc.stderr)
self.assertContained('undefined symbol: elsey', proc.stderr)
check_success = False
elif action == 'ERROR' and not value:
# Error disables, should only warn
Expand Down Expand Up @@ -3548,7 +3548,7 @@ def test_js_lib_missing_sig(self):
def test_js_lib_quoted_key(self):
create_file('lib.js', r'''
mergeInto(LibraryManager.library, {
__internal_data:{
__internal_data:{
'<' : 0,
'white space' : 1
},
Expand Down Expand Up @@ -6591,7 +6591,7 @@ def test_no_warn_exported_jslibfunc(self):
err = self.expect_fail([EMCC, test_file('hello_world.c'),
'-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=alGetError',
'-sEXPORTED_FUNCTIONS=_main,_alGet'])
self.assertContained('undefined exported symbol: "_alGet"', err)
self.assertContained('error: undefined exported symbol: "_alGet" [-Wundefined] [-Werror]', err)

def test_musl_syscalls(self):
self.run_process([EMCC, test_file('hello_world.c')])
Expand Down Expand Up @@ -8361,7 +8361,7 @@ def test_full_js_library(self):
def test_full_js_library_undefined(self):
create_file('main.c', 'void foo(); int main() { foo(); return 0; }')
err = self.expect_fail([EMCC, 'main.c', '-sSTRICT_JS', '-sINCLUDE_FULL_LIBRARY'])
self.assertContained('error: undefined symbol: foo', err)
self.assertContained('undefined symbol: foo', err)

def test_full_js_library_except(self):
self.set_setting('INCLUDE_FULL_LIBRARY', 1)
Expand Down Expand Up @@ -9017,19 +9017,20 @@ def test_js_preprocess(self):

err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js'], stderr=PIPE).stderr
self.assertContained('JSLIB: none of the above', err)
self.assertEqual(err.count('JSLIB'), 1)
self.assertNotContained('JSLIB: MAIN_MODULE', err)
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)

err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE'], stderr=PIPE).stderr
self.assertContained('JSLIB: MAIN_MODULE=1', err)
self.assertEqual(err.count('JSLIB'), 1)
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)

err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE=2'], stderr=PIPE).stderr
self.assertContained('JSLIB: MAIN_MODULE=2', err)
self.assertEqual(err.count('JSLIB'), 1)
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)

err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sEXIT_RUNTIME'], stderr=PIPE).stderr
self.assertContained('JSLIB: EXIT_RUNTIME', err)
self.assertEqual(err.count('JSLIB'), 1)
self.assertNotContained('JSLIB: MAIN_MODULE', err)

def test_html_preprocess(self):
src_file = test_file('module/test_stdin.c')
Expand Down Expand Up @@ -9202,7 +9203,7 @@ def test_dash_s_list_parsing(self):
# stray slash
('EXPORTED_FUNCTIONS=["_a", "_b",\\ "_c", "_d"]', 'undefined exported symbol: "\\\\ "_c"'),
# missing comma
('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'undefined exported symbol: "_b" "_c"'),
('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'emcc: error: undefined exported symbol: "_b" "_c" [-Wundefined] [-Werror]'),
]:
print(export_arg)
proc = self.run_process([EMCC, 'src.c', '-s', export_arg], stdout=PIPE, stderr=PIPE, check=not expected)
Expand Down Expand Up @@ -10887,20 +10888,20 @@ def test_signature_mismatch(self):
self.expect_fail([EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c'])
self.expect_fail([EMCC, '-sSTRICT', 'a.c', 'b.c'])

# TODO(sbc): Remove these tests once we remove the LLD_REPORT_UNDEFINED
def test_lld_report_undefined(self):
create_file('main.c', 'void foo(); int main() { foo(); return 0; }')
stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED', 'main.c'])
self.assertContained('wasm-ld: error:', stderr)
self.assertContained('main_0.o: undefined symbol: foo', stderr)
stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED=0', 'main.c'])
self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', stderr)

def test_lld_report_undefined_reverse_deps(self):
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sREVERSE_DEPS=all', test_file('hello_world.c')])
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sREVERSE_DEPS=all', test_file('hello_world.c')])

def test_lld_report_undefined_exceptions(self):
self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED', '-fwasm-exceptions', test_file('hello_libcxx.cpp')])
self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED=0', '-fwasm-exceptions', test_file('hello_libcxx.cpp')])

def test_lld_report_undefined_main_module(self):
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sMAIN_MODULE=2', test_file('hello_world.c')])
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sMAIN_MODULE=2', test_file('hello_world.c')])

# Verifies that warning messages that Closure outputs are recorded to console
def test_closure_warnings(self):
Expand Down Expand Up @@ -11038,14 +11039,12 @@ def test_linker_version(self):
def test_chained_js_error_diagnostics(self):
err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c'), '--js-library', test_file('test_chained_js_error_diagnostics.js')])
self.assertContained("error: undefined symbol: nonexistent_function (referenced by bar__deps: ['nonexistent_function'], referenced by foo__deps: ['bar'], referenced by top-level compiled C/C++ code)", err)
# Check that we don't recommend LLD_REPORT_UNDEFINED for chained dependencies.
self.assertNotContained('LLD_REPORT_UNDEFINED', err)

# Test without chaining. In this case we don't include the JS library at all resulting in `foo`
# being undefined in the native code and in this case we recommend LLD_REPORT_UNDEFINED.
# Test without chaining. In this case we don't include the JS library at
# all resulting in `foo` being undefined in the native code.
err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c')])
self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', err)
self.assertContained('Link with `-sLLD_REPORT_UNDEFINED` to get more information on undefined symbols', err)
self.assertContained('undefined symbol: foo', err)
self.assertNotContained('referenced by top-level compiled C/C++ code', err)

def test_xclang_flag(self):
create_file('foo.h', ' ')
Expand Down Expand Up @@ -11483,7 +11482,7 @@ def test_split_main_module(self):

self.run_process([EMCC, side_src, '-sSIDE_MODULE', '-g', '-o', 'libhello.wasm'])

self.emcc_args += ['-g']
self.emcc_args += ['-g', 'libhello.wasm']
self.emcc_args += ['-sMAIN_MODULE=2']
self.emcc_args += ['-sEXPORTED_FUNCTIONS=_printf']
self.emcc_args += ['-sSPLIT_MODULE', '-Wno-experimental']
Expand Down Expand Up @@ -11847,7 +11846,7 @@ def test_no_main_with_PROXY_TO_PTHREAD(self):
void foo() {}
''')
err = self.expect_fail([EMCC, 'lib.cpp', '-pthread', '-sPROXY_TO_PTHREAD'])
self.assertContained('error: PROXY_TO_PTHREAD proxies main() for you, but no main exists', err)
self.assertContained('crt1_proxy_main.o: undefined symbol: main', err)

def test_archive_bad_extension(self):
# Regression test for https://github.com/emscripten-core/emscripten/issues/14012
Expand Down Expand Up @@ -11889,7 +11888,7 @@ def test_unimplemented_syscalls(self, args):
cmd = [EMCC, 'main.c', '-sASSERTIONS'] + args
if args:
err = self.expect_fail(cmd)
self.assertContained('error: undefined symbol: __syscall_mincore', err)
self.assertContained('undefined symbol: __syscall_mincore', err)
else:
self.run_process(cmd)
err = self.run_js('a.out.js')
Expand Down
1 change: 0 additions & 1 deletion tools/gen_struct_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ def inspect_headers(headers, cflags):
'-nostdlib',
compiler_rt,
'-sBOOTSTRAPPING_STRUCT_INFO',
'-sLLD_REPORT_UNDEFINED',
'-sSTRICT',
'-sASSERTIONS=0',
# Use SINGLE_FILE so there is only a single
Expand Down

0 comments on commit 30c3a36

Please sign in to comment.