Skip to content

Commit

Permalink
Remove deps_info system and the running of llvm-nm on input file. NFC (
Browse files Browse the repository at this point in the history
…#18905)

This uses a new "stub object" construct to tell the linker (wasm-ld)
not only about the existence of the JS library symbols but the native
symbols on which they depend (a.k.a reverse dependencies).

This allows us to completely remove deps_info.py in favor of just using
normal `__deps` entries in the library files.  It also means we no
longer need to run `llvm-nm` on the linker inputs to discover the
symbols they use.

Depends on: https://reviews.llvm.org/D145308

Fixes: #18875
  • Loading branch information
sbc100 authored Apr 18, 2023
1 parent 8fc85aa commit 1e7b78f
Show file tree
Hide file tree
Showing 24 changed files with 205 additions and 547 deletions.
7 changes: 7 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ See docs/process.md for more on how version tagging works.

3.1.37 (in development)
-----------------------
- The old reverse dependency system based on `tools/deps_info.py` has been
removed and the existing `__deps` entries in JS library files can now be used
to express JS-to-native dependencies. As well being more precise, and
extensible via user-supplied JS libraries, this also speeds up link times
since we no longer need scan linker inputs using `llvm-nm`. It also
completely removes the need for the `REVERSE_DEPS` settings which has now
been deprecated. (#18905)

3.1.36 - 04/16/23
-----------------
Expand Down
32 changes: 15 additions & 17 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
from tools.minimal_runtime_shell import generate_minimal_runtime_html
import tools.line_endings
from tools import feature_matrix
from tools import deps_info
from tools import js_manipulation
from tools import wasm2c
from tools import webassembly
Expand Down Expand Up @@ -1275,7 +1274,7 @@ def run(args):
process_libraries(state, [])
if len(input_files) != 1:
exit_with_error('--post-link requires a single input file')
phase_post_link(options, state, input_files[0][1], wasm_target, target)
phase_post_link(options, state, input_files[0][1], wasm_target, target, {})
return 0

## Compile source code to object files
Expand Down Expand Up @@ -1316,11 +1315,10 @@ def run(args):
js_info = get_js_sym_info()
if not settings.SIDE_MODULE:
js_syms = js_info['deps']
deps_info.append_deps_info(js_syms)
if settings.ASYNCIFY:
settings.ASYNCIFY_IMPORTS += ['env.' + x for x in js_info['asyncFuncs']]

phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs)
phase_calculate_system_libraries(state, linker_arguments, newargs)

phase_link(linker_arguments, wasm_target, js_syms)

Expand All @@ -1336,7 +1334,7 @@ def run(args):

# Perform post-link steps (unless we are running bare mode)
if options.oformat != OFormat.BARE:
phase_post_link(options, state, wasm_target, wasm_target, target)
phase_post_link(options, state, wasm_target, wasm_target, target, js_syms)

return 0

Expand Down Expand Up @@ -1952,6 +1950,12 @@ def phase_linker_setup(options, state, newargs):

if '_main' in settings.EXPORTED_FUNCTIONS:
settings.EXPORT_IF_DEFINED.append('__main_argc_argv')
elif settings.ASSERTIONS:
# In debug builds when `main` is not explictly requested as an
# export we still add it to EXPORT_IF_DEFINED so that we can warn
# users who forget to explicitly export `main`.
# See other.test_warn_unexported_main.
settings.EXPORT_IF_DEFINED.append('main')

if settings.ASSERTIONS:
# Exceptions are thrown with a stack trace by default when ASSERTIONS is
Expand Down Expand Up @@ -2082,11 +2086,6 @@ def phase_linker_setup(options, state, newargs):
settings.INCLUDE_FULL_LIBRARY = 1
settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE += ['$loadDylibs']

# If we are including the entire JS library then we know for sure we will, by definition,
# require all the reverse dependencies.
if settings.INCLUDE_FULL_LIBRARY:
default_setting('REVERSE_DEPS', 'all')

if settings.MAIN_MODULE == 1 or settings.SIDE_MODULE == 1:
settings.LINKABLE = 1

Expand Down Expand Up @@ -3086,14 +3085,13 @@ def compile_source_file(i, input_file):


@ToolchainProfiler.profile_block('calculate system libraries')
def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs):
def phase_calculate_system_libraries(state, linker_arguments, newargs):
extra_files_to_link = []
# Link in ports and system libraries, if necessary
if not settings.SIDE_MODULE:
# Ports are always linked into the main module, never the side module.
extra_files_to_link += ports.get_libs(settings)
all_linker_inputs = [f for _, f in sorted(linker_inputs)] + extra_files_to_link
extra_files_to_link += system_libs.calculate(all_linker_inputs, newargs, forced=state.forced_stdlibs)
extra_files_to_link += system_libs.calculate(newargs, forced=state.forced_stdlibs)
linker_arguments.extend(extra_files_to_link)


Expand All @@ -3112,7 +3110,7 @@ def phase_link(linker_arguments, wasm_target, js_syms):


@ToolchainProfiler.profile_block('post_link')
def phase_post_link(options, state, in_wasm, wasm_target, target):
def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms):
global final_js

target_basename = unsuffixed_basename(target)
Expand All @@ -3134,7 +3132,7 @@ def phase_post_link(options, state, in_wasm, wasm_target, target):
else:
memfile = shared.replace_or_append_suffix(target, '.mem')

phase_emscript(options, in_wasm, wasm_target, memfile)
phase_emscript(options, in_wasm, wasm_target, memfile, js_syms)

if options.js_transform:
phase_source_transforms(options)
Expand All @@ -3152,7 +3150,7 @@ def phase_post_link(options, state, in_wasm, wasm_target, target):


@ToolchainProfiler.profile_block('emscript')
def phase_emscript(options, in_wasm, wasm_target, memfile):
def phase_emscript(options, in_wasm, wasm_target, memfile, js_syms):
# Emscripten
logger.debug('emscript')

Expand All @@ -3161,7 +3159,7 @@ def phase_emscript(options, in_wasm, wasm_target, memfile):
# _read in shell.js depends on intArrayToString when SUPPORT_BASE64_EMBEDDING is set
settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE.append('$intArrayToString')

emscripten.run(in_wasm, wasm_target, final_js, memfile)
emscripten.run(in_wasm, wasm_target, final_js, memfile, js_syms)
save_intermediate('original')


Expand Down
24 changes: 17 additions & 7 deletions emscripten.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def create_named_globals(metadata):
return '\n'.join(named_globals)


def emscript(in_wasm, out_wasm, outfile_js, memfile):
def emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms):
# Overview:
# * Run wasm-emscripten-finalize to extract metadata and modify the binary
# to use emscripten's wasm<->JS ABI
Expand All @@ -309,7 +309,7 @@ def emscript(in_wasm, out_wasm, outfile_js, memfile):
# set file locations, so that JS glue can find what it needs
settings.WASM_BINARY_FILE = js_manipulation.escape_for_js_string(os.path.basename(out_wasm))

metadata = finalize_wasm(in_wasm, out_wasm, memfile)
metadata = finalize_wasm(in_wasm, out_wasm, memfile, js_syms)

if settings.RELOCATABLE and settings.MEMORY64 == 2:
metadata.imports += ['__memory_base32']
Expand Down Expand Up @@ -461,7 +461,7 @@ def get_metadata(infile, outfile, modify_wasm, args):
return metadata


def finalize_wasm(infile, outfile, memfile):
def finalize_wasm(infile, outfile, memfile, js_syms):
building.save_intermediate(infile, 'base.wasm')
args = []

Expand Down Expand Up @@ -536,13 +536,23 @@ def finalize_wasm(infile, outfile, memfile):

expected_exports = set(settings.EXPORTED_FUNCTIONS)
expected_exports.update(asmjs_mangle(s) for s in settings.REQUIRED_EXPORTS)

# Calculate the subset of exports that were explicitly marked with llvm.used.
# Assume that when JS symbol dependencies are exported it is because they
# are needed by by a JS symbol and are not being explicitly exported due
# to EMSCRIPTEN_KEEPALIVE (llvm.used).
for deps in js_syms.values():
expected_exports.update(asmjs_mangle(s) for s in deps)

# Calculate the subset of exports that were explicitly marked as
# EMSCRIPTEN_KEEPALIVE (llvm.used).
# These are any exports that were not requested on the command line and are
# not known auto-generated system functions.
unexpected_exports = [e for e in metadata.exports if treat_as_user_function(e)]
unexpected_exports = [asmjs_mangle(e) for e in unexpected_exports]
unexpected_exports = [e for e in unexpected_exports if e not in expected_exports]
if '_main' in unexpected_exports:
logger.warning('main() is in the input files, but "_main" is not in EXPORTED_FUNCTIONS, which means it may be eliminated as dead code. Export it if you want main() to run.')
unexpected_exports.remove('_main')

building.user_requested_exports.update(unexpected_exports)
settings.EXPORTED_FUNCTIONS.extend(unexpected_exports)

Expand Down Expand Up @@ -922,5 +932,5 @@ def normalize_line_endings(text):
return text


def run(in_wasm, out_wasm, outfile_js, memfile):
emscript(in_wasm, out_wasm, outfile_js, memfile)
def run(in_wasm, out_wasm, outfile_js, memfile, js_syms):
emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms)
Original file line number Diff line number Diff line change
Expand Up @@ -596,9 +596,6 @@ See the `library_*.js`_ files for other examples.
This is useful when all the implemented methods use a JavaScript
singleton containing helper methods. See ``library_webgl.js`` for
an example.
- If a JavaScript library depends on a compiled C library (like most
of *libc*), you must edit `src/deps_info.json`_. Search for
"deps_info" in `tools/system_libs.py`_.
- The keys passed into `mergeInto` generate functions that are prefixed
by ``_``. In other words ``my_func: function() {},`` becomes
``function _my_func() {}``, as all C methods in emscripten have a ``_`` prefix. Keys starting with ``$`` have the ``$``
Expand Down Expand Up @@ -810,7 +807,6 @@ you can give it a try. See `Emnapi documentation`_ for more details.

.. _library.js: https://github.com/emscripten-core/emscripten/blob/main/src/library.js
.. _test_js_libraries: https://github.com/emscripten-core/emscripten/blob/1.29.12/tests/test_core.py#L5043
.. _src/deps_info.json: https://github.com/emscripten-core/emscripten/blob/main/src/deps_info.json
.. _tools/system_libs.py: https://github.com/emscripten-core/emscripten/blob/main/tools/system_libs.py
.. _library_\*.js: https://github.com/emscripten-core/emscripten/tree/main/src
.. _test_add_function in test/test_core.py: https://github.com/emscripten-core/emscripten/blob/1.29.12/tests/test_core.py#L6237
Expand Down
30 changes: 28 additions & 2 deletions src/jsifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,26 @@ function isDefined(symName) {
return false;
}

function getTransitiveDeps(symbol) {
// TODO(sbc): Use some kind of cache to avoid quadratic behaviour here.
const transitiveDeps = new Set();
const seen = new Set();
const toVisit = [symbol];
while (toVisit.length) {
const sym = toVisit.pop();
if (!seen.has(sym)) {
let directDeps = LibraryManager.library[sym + '__deps'] || [];
directDeps = directDeps.filter((d) => typeof d === 'string');
if (directDeps.length) {
directDeps.forEach(transitiveDeps.add, transitiveDeps);
toVisit.push(...directDeps);
}
seen.add(sym);
}
}
return Array.from(transitiveDeps);
}

function runJSify() {
const libraryItems = [];
const symbolDeps = {};
Expand Down Expand Up @@ -260,8 +280,14 @@ function ${name}(${args}) {

if (symbolsOnly) {
if (!isJsOnlySymbol(symbol) && LibraryManager.library.hasOwnProperty(symbol)) {
externalDeps = deps.filter((d) => !isJsOnlySymbol(d) && !(d in LibraryManager.library) && typeof d === 'string');
symbolDeps[symbol] = externalDeps;
var value = LibraryManager.library[symbol];
var resolvedSymbol = symbol;
// Resolve aliases before looking up deps
if (typeof value == 'string' && value[0] != '=' && LibraryManager.library.hasOwnProperty(value)) {
resolvedSymbol = value;
}
var transtiveDeps = getTransitiveDeps(resolvedSymbol);
symbolDeps[symbol] = transtiveDeps.filter((d) => !isJsOnlySymbol(d) && !(d in LibraryManager.library));
}
return;
}
Expand Down
11 changes: 9 additions & 2 deletions src/library.js
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,13 @@ mergeInto(LibraryManager.library, {
// ==========================================================================

#if SUPPORT_LONGJMP == 'emscripten'
// In WebAssemblyLowerEmscriptenEHSjLj pass in the LLVM backend, function
// calls that exist in the same function with setjmp are converted to a code
// sequence that includes invokes, malloc, free, saveSetjmp, and
// emscripten_longjmp. setThrew is called from invokes, but we don't have
// any way to express that dependency so we use emscripten_throw_longjmp as
// a proxy and declare the dependency here.
_emscripten_throw_longjmp__deps: ['setThrew'],
_emscripten_throw_longjmp: function() {
#if EXCEPTION_STACK_TRACES
throw new EmscriptenSjLj;
Expand Down Expand Up @@ -1721,7 +1728,7 @@ mergeInto(LibraryManager.library, {
return { family: family, addr: addr, port: port };
},
$writeSockaddr__docs: '/** @param {number=} addrlen */',
$writeSockaddr__deps: ['$Sockets', '$inetPton4', '$inetPton6', '$zeroMemory'],
$writeSockaddr__deps: ['$Sockets', '$inetPton4', '$inetPton6', '$zeroMemory', 'htons'],
$writeSockaddr: function (sa, family, addr, port, addrlen) {
switch (family) {
case {{{ cDefs.AF_INET }}}:
Expand Down Expand Up @@ -1858,7 +1865,7 @@ mergeInto(LibraryManager.library, {
return 0;
},

getaddrinfo__deps: ['$Sockets', '$DNS', '$inetPton4', '$inetNtop4', '$inetPton6', '$inetNtop6', '$writeSockaddr'],
getaddrinfo__deps: ['$Sockets', '$DNS', '$inetPton4', '$inetNtop4', '$inetPton6', '$inetNtop6', '$writeSockaddr', 'malloc', 'htonl'],
getaddrinfo__proxy: 'sync',
getaddrinfo: function(node, service, hint, out) {
// Note getaddrinfo currently only returns a single addrinfo with ai_next defaulting to NULL. When NULL
Expand Down
6 changes: 5 additions & 1 deletion src/library_async.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ mergeInto(LibraryManager.library, {
#if ASYNCIFY
$Asyncify__deps: ['$runAndAbortIfError', '$callUserCallback', '$sigToWasmTypes',
#if !MINIMAL_RUNTIME
'$runtimeKeepalivePush', '$runtimeKeepalivePop'
'$runtimeKeepalivePush', '$runtimeKeepalivePop',
#endif
#if ASYNCIFY == 1
// Needed by allocateData and handleSleep respectively
'malloc', 'free',
#endif
],

Expand Down
2 changes: 2 additions & 0 deletions src/library_browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,7 @@ var LibraryBrowser = {

// To avoid creating worker parent->child chains, always proxies to execute on the main thread.
emscripten_create_worker__proxy: 'sync',
emscripten_create_worker__deps: ['$UTF8ToString', 'malloc', 'free'],
emscripten_create_worker: function(url) {
url = UTF8ToString(url);
var id = Browser.workers.length;
Expand Down Expand Up @@ -1253,6 +1254,7 @@ var LibraryBrowser = {
return id;
},

emscripten_destroy_worker__deps: ['free'],
emscripten_destroy_worker__proxy: 'sync',
emscripten_destroy_worker: function(id) {
var info = Browser.workers[id];
Expand Down
12 changes: 11 additions & 1 deletion src/library_exceptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ var LibraryExceptions = {
//
// excPtr - Thrown object pointer to wrap. Metadata pointer is calculated from it.
$ExceptionInfo__docs: '/** @constructor */',
$ExceptionInfo__deps: [
'__cxa_is_pointer_type',
#if EXCEPTION_DEBUG
$ExceptionInfo__deps: ['$ptrToString'],
'$ptrToString'
#endif
],
$ExceptionInfo: function(excPtr) {
this.excPtr = excPtr;
this.ptr = excPtr - {{{ C_STRUCTS.__cxa_exception.__size__ }}};
Expand Down Expand Up @@ -416,6 +419,13 @@ var LibraryExceptions = {
addCxaCatch = function(n) {
LibraryManager.library['__cxa_find_matching_catch_' + n] = '__cxa_find_matching_catch';
};

// Add the first 10 catch handlers premptively. Others get added on demand in
// jsifier. This is done here primarily so that these symbols end up with the
// correct deps in the stub library that we pass to wasm-ld.
for (let i = 1; i < 10; i++) {
addCxaCatch(i)
}
#endif

mergeInto(LibraryManager.library, LibraryExceptions);
2 changes: 1 addition & 1 deletion src/library_exceptions_stub.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var LibraryExceptions = {};
#if !INCLUDE_FULL_LIBRARY
// This method of link-time error genertation is not compatible with INCLUDE_FULL_LIBRARY
LibraryExceptions[name + '__deps'] = [function() {
error('DISABLE_EXCEPTION_THROWING was set (likely due to -fno-exceptions), which means no C++ exception throwing support code is linked in, but such support is required by symbol ' + name + '. Either do not set DISABLE_EXCEPTION_THROWING (if you do want exception throwing) or compile all source files with -fno-except (so that no exceptions support code is required); also make sure DISABLE_EXCEPTION_CATCHING is set to the right value - if you want exceptions, it should be off, and vice versa.');
error(`DISABLE_EXCEPTION_THROWING was set (likely due to -fno-exceptions), which means no C++ exception throwing support code is linked in, but such support is required by symbol '${name}'. Either do not set DISABLE_EXCEPTION_THROWING (if you do want exception throwing) or compile all source files with -fno-except (so that no exceptions support code is required); also make sure DISABLE_EXCEPTION_CATCHING is set to the right value - if you want exceptions, it should be off, and vice versa.`);
}];
#endif
});
Expand Down
2 changes: 1 addition & 1 deletion src/library_glew.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/

var LibraryGLEW = {
$GLEW__deps: ['glGetString', '$stringToNewUTF8'],
$GLEW__deps: ['glGetString', '$stringToNewUTF8', '$UTF8ToString'],
$GLEW: {
isLinaroFork: 1,
extensions: null,
Expand Down
2 changes: 1 addition & 1 deletion src/library_glfw.js
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ var LibraryGLFW = {
/*******************************************************************************
* GLFW FUNCTIONS
******************************************************************************/
glfwInit__deps: ['emscripten_get_device_pixel_ratio'],
glfwInit__deps: ['emscripten_get_device_pixel_ratio', 'malloc', 'free'],
glfwInit__sig: 'i',
glfwInit: function() {
if (GLFW.windows) return 1; // GL_TRUE
Expand Down
Loading

0 comments on commit 1e7b78f

Please sign in to comment.