Skip to content

Commit

Permalink
fix(builtin): make linker aspect run in constant time per target
Browse files Browse the repository at this point in the history
Previously, for every transitive dependency of a node binary target, the
module_mappings_aspect had a runtime quadratic in the number of
mappings: It traverses the mappings of all direct dependencies in a loop
and called _link_mapping in its body, which again traverses the
mappings.

With this commit, the LinkerPackageMappingInfo provider is restructured
to hold depsets of mapping entries and node_modules root paths. In this
way the aspect invocations for the individual targets never traverse the
(partially constructed) mappings.
Instead, write_node_modules_manifest flattens the depset only after the
aspect has run. If this should ever become a performance bottleneck
again, the flattening could be moved into the execution phase by
letting a dedicated binary consume the depset wrappen in an Args object.

In an experiment with a single medium-sized package.json and a Node
binary depending on all targets in the corresponding npm repository,
this commit reduces the analysis wall time spent in the aspect
implementation from ~30s to ~70ms.
  • Loading branch information
fmeum authored and gregmagolan committed Feb 2, 2022
1 parent 6d712cd commit 522fd7c
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 114 deletions.
217 changes: 119 additions & 98 deletions internal/linker/link_node_modules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,17 @@ def _debug(vars, *args):
LinkerPackageMappingInfo = provider(
doc = """Provider capturing package mappings for the linker to consume.""",
fields = {
"mappings": "Dictionary of mappings. Maps package names to an exec path.",
"mappings": """Depset of structs with mapping info.
Each struct has the following fields:
package_name: The name of the package.
package_path: The root path of the node_modules under which this package should be linked.
link_path: The exec path under which the package is available.
Note: The depset may contain multiple entries per (package_name, package_path) pair.
Consumers should handle these duplicated appropriately. The depset uses topological order to ensure
that a target's mappings come before possibly conflicting mappings of its dependencies.""",
"node_modules_roots": "Depset of node_module roots.",
},
)

Expand All @@ -40,39 +50,54 @@ def add_arg(args, arg):
else:
args.add(arg)

def _link_mapping(label, mappings, k, v):
# Check that two package name mapping do not map to two different source paths
k_segs = k.split(":")
package_name = k_segs[0]
package_path = k_segs[1] if len(k_segs) > 1 else ""
link_path = v

for iter_key, iter_values in mappings.items():
# Map key is of format "package_name:package_path"
# Map values are of format [deprecated, link_path]
iter_segs = iter_key.split(":")
iter_package_name = iter_segs[0]
iter_package_path = iter_segs[1] if len(iter_segs) > 1 else ""
iter_source_path = iter_values
if package_name == iter_package_name and package_path == iter_package_path:
# If we're linking to the output tree be tolerant of linking to different
# output trees since we can have "static" links that come from cfg="exec" binaries.
# In the future when we static link directly into runfiles without the linker
# we can remove this logic.
link_path_segments = link_path.split("/")
iter_source_path_segments = iter_source_path.split("/")
bin_links = len(link_path_segments) >= 3 and len(iter_source_path_segments) >= 3 and link_path_segments[0] == "bazel-out" and iter_source_path_segments[0] == "bazel-out" and link_path_segments[2] == "bin" and iter_source_path_segments[2] == "bin"
if bin_links:
compare_link_path = "/".join(link_path_segments[3:]) if len(link_path_segments) > 3 else ""
compare_iter_source_path = "/".join(iter_source_path_segments[3:]) if len(iter_source_path_segments) > 3 else ""
else:
compare_link_path = link_path
compare_iter_source_path = iter_source_path
if compare_link_path != compare_iter_source_path:
msg = "conflicting mapping at '%s': '%s' and '%s' map to conflicting %s and %s" % (label, k, iter_key, compare_link_path, compare_iter_source_path)
fail(msg)

return True
def _detect_conflicts(module_sets, mapping):
"""Verifies that the new mapping does not conflict with existing mappings in module_sets."""
if mapping.package_path not in module_sets:
return
existing_link_path = module_sets[mapping.package_path].get(mapping.package_name)
if existing_link_path == None:
return

# If we're linking to the output tree be tolerant of linking to different
# output trees since we can have "static" links that come from cfg="exec" binaries.
# In the future when we static link directly into runfiles without the linker
# we can remove this logic.
link_path_segments = mapping.link_path.split("/")
existing_link_path_segments = existing_link_path.split("/")
bin_links = len(link_path_segments) >= 3 and len(existing_link_path_segments) >= 3 and link_path_segments[0] == "bazel-out" and existing_link_path_segments[0] == "bazel-out" and link_path_segments[2] == "bin" and existing_link_path_segments[2] == "bin"
if bin_links:
compare_link_path = "/".join(link_path_segments[3:]) if len(link_path_segments) > 3 else ""
compare_existing_link_path = "/".join(existing_link_path_segments[3:]) if len(existing_link_path_segments) > 3 else ""
else:
compare_link_path = mapping.link_path
compare_existing_link_path = existing_link_path
if compare_link_path != compare_existing_link_path:
msg = "conflicting mapping: '%s' (package path: %s) maps to conflicting paths '%s' and '%s'" % (mapping.package_name, mapping.package_path, compare_link_path, compare_existing_link_path)
fail(msg)

def _flatten_to_module_set(mappings_depset):
"""Convert a depset of mapping to a module sets (modules per package package_path).
The returned dictionary has the following structure:
{
"package_path": {
"package_name": "link_path",
...
},
...
}
"""

# FIXME: Flattens a depset during the analysis phase. Ideally, this would be done during the
# execution phase using an Args object.
flattens_mappings = mappings_depset.to_list()
module_sets = {}
for mapping in flattens_mappings:
_detect_conflicts(module_sets, mapping)
if mapping.package_path not in module_sets:
module_sets[mapping.package_path] = {}
module_sets[mapping.package_path][mapping.package_name] = mapping.link_path
return module_sets

def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_workspace_root = False):
"""Writes a manifest file read by the linker, containing info about resolving runtime dependencies
Expand All @@ -85,7 +110,6 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
If source files need to be required then they can be copied to the bin_dir with copy_to_bin.
"""

mappings = {ctx.workspace_name: ctx.bin_dir.path} if link_workspace_root else {}
node_modules_roots = {}

# Look through data/deps attributes to find the root directories for the third-party node_modules;
Expand All @@ -100,38 +124,33 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
fail("All npm dependencies at the path '%s' must come from a single workspace. Found '%s' and '%s'." % (path, other_workspace, workspace))
node_modules_roots[path] = workspace

# Look through data/deps attributes to find first party deps to link
direct_mappings = []
direct_node_modules_roots = []
if link_workspace_root:
direct_mappings.append(struct(
package_name = ctx.workspace_name,
package_path = "",
link_path = ctx.bin_dir.path,
))
direct_node_modules_roots.append("")

transitive_mappings = []
transitive_node_modules_roots = []
for dep in extra_data + getattr(ctx.attr, "data", []) + getattr(ctx.attr, "deps", []):
if not LinkerPackageMappingInfo in dep:
continue
transitive_mappings.append(dep[LinkerPackageMappingInfo].mappings)
transitive_node_modules_roots.append(dep[LinkerPackageMappingInfo].node_modules_roots)

for k, v in dep[LinkerPackageMappingInfo].mappings.items():
map_key_split = k.split(":")
package_name = map_key_split[0]
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
if package_path not in node_modules_roots:
node_modules_roots[package_path] = ""
if _link_mapping(dep.label, mappings, k, v):
_debug(ctx.var, "Linking %s: %s" % (k, v))
mappings[k] = v

# Convert mappings to a module sets (modules per package package_path)
# {
# "package_path": {
# "package_name": "link_path",
# ...
# },
# ...
# }
module_sets = {}
for k, v in mappings.items():
map_key_split = k.split(":")
package_name = map_key_split[0]
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
link_path = v
if package_path not in module_sets:
module_sets[package_path] = {}
module_sets[package_path][package_name] = link_path
mappings = depset(direct_mappings, transitive = transitive_mappings, order = "topological")
module_sets = _flatten_to_module_set(mappings)

# FIXME: Flattens a depset during the analysis phase. Ideally, this would be done during the
# execution phase using an Args object.
linker_node_modules_roots = depset(direct_node_modules_roots, transitive = transitive_node_modules_roots).to_list()
for node_modules_root in linker_node_modules_roots:
if node_modules_root not in node_modules_roots:
node_modules_roots[node_modules_root] = ""

# Write the result to a file, and use the magic node option --bazel_node_modules_manifest
# The launcher.sh will peel off this argument and pass it to the linker rather than the program.
Expand All @@ -148,58 +167,60 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
ctx.actions.write(modules_manifest, str(content))
return modules_manifest

def _get_module_mappings(target, ctx):
"""Gathers module mappings from LinkablePackageInfo which maps "package_name:package_path" to link_path.
def _get_linker_package_mapping_info(target, ctx):
"""Transitively gathers module mappings and node_modules roots from LinkablePackageInfo.
Args:
target: target
ctx: ctx
Returns:
Returns module mappings of shape:
{ "package_name:package_path": link_path, ... }
A LinkerPackageMappingInfo provider that contains the mappings and roots for the current
target and all its transitive dependencies.
"""
mappings = {}

# Propagate transitive mappings
transitive_mappings = []
transitive_node_modules_roots = []
for name in _MODULE_MAPPINGS_DEPS_NAMES:
for dep in getattr(ctx.rule.attr, name, []):
if not LinkerPackageMappingInfo in dep:
continue
transitive_mappings.append(dep[LinkerPackageMappingInfo].mappings)
transitive_node_modules_roots.append(dep[LinkerPackageMappingInfo].node_modules_roots)

for k, v in dep[LinkerPackageMappingInfo].mappings.items():
if _link_mapping(target.label, mappings, k, v):
_debug(ctx.var, "target %s propagating module mapping %s: %s" % (dep.label, k, v))
mappings[k] = v
direct_mappings = []
direct_node_modules_roots = []

# Look for LinkablePackageInfo mapping in this node
# LinkablePackageInfo may be provided without a package_name so check for that case as well
if not LinkablePackageInfo in target:
# No mappings contributed here, short-circuit with the transitive ones we collected
_debug(ctx.var, "No LinkablePackageInfo for", target.label)
return mappings

linkable_package_info = target[LinkablePackageInfo]

# LinkablePackageInfo may be provided without a package_name so check for that case as well
if not linkable_package_info.package_name:
# No mappings contributed here, short-circuit with the transitive ones we collected
elif not target[LinkablePackageInfo].package_name:
_debug(ctx.var, "No package_name in LinkablePackageInfo for", target.label)
return mappings

package_path = linkable_package_info.package_path if hasattr(linkable_package_info, "package_path") else ""
map_key = "%s:%s" % (linkable_package_info.package_name, package_path)
map_value = linkable_package_info.path

if _link_mapping(target.label, mappings, map_key, map_value):
_debug(ctx.var, "target %s adding module mapping %s: %s" % (target.label, map_key, map_value))
mappings[map_key] = map_value

# Returns mappings of shape:
# {
# "package_name:package_path": link_path,
# ...
# }
return mappings
else:
linkable_package_info = target[LinkablePackageInfo]
package_path = linkable_package_info.package_path if hasattr(linkable_package_info, "package_path") else ""
direct_mappings.append(
struct(
package_name = linkable_package_info.package_name,
package_path = package_path,
link_path = linkable_package_info.path,
),
)
_debug(ctx.var, "target %s (package path: %s) adding module mapping %s: %s" % (
target.label,
package_path,
linkable_package_info.package_name,
linkable_package_info.path,
))
direct_node_modules_roots.append(package_path)

mappings = depset(direct_mappings, transitive = transitive_mappings, order = "topological")
node_modules_roots = depset(direct_node_modules_roots, transitive = transitive_node_modules_roots)
return LinkerPackageMappingInfo(
mappings = mappings,
node_modules_roots = node_modules_roots,
)

def _module_mappings_aspect_impl(target, ctx):
# If the target explicitly provides mapping information, we will not propagate
Expand All @@ -209,7 +230,7 @@ def _module_mappings_aspect_impl(target, ctx):
return []

return [
LinkerPackageMappingInfo(mappings = _get_module_mappings(target, ctx)),
_get_linker_package_mapping_info(target, ctx),
]

module_mappings_aspect = aspect(
Expand Down
10 changes: 4 additions & 6 deletions internal/node/node.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,10 @@ def _compute_node_modules_roots(ctx, data):
# Add in roots for multi-linked npm deps
for dep in data:
if LinkerPackageMappingInfo in dep:
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
map_key_split = k.split(":")
package_name = map_key_split[0]
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
if package_path not in node_modules_roots:
node_modules_roots[package_path] = ""
linker_node_modules_roots = dep[LinkerPackageMappingInfo].node_modules_roots.to_list()
for node_modules_root in linker_node_modules_roots:
if node_modules_root not in node_modules_roots:
node_modules_roots[node_modules_root] = ""

return node_modules_roots

Expand Down
10 changes: 4 additions & 6 deletions internal/providers/node_runtime_deps_info.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,10 @@ def _compute_node_modules_roots(ctx):
# Add in roots for multi-linked npm deps
for dep in deps:
if LinkerPackageMappingInfo in dep:
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
map_key_split = k.split(":")
package_name = map_key_split[0]
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
if package_path not in node_modules_roots:
node_modules_roots[package_path] = ""
linker_node_modules_roots = dep[LinkerPackageMappingInfo].node_modules_roots.to_list()
for node_modules_root in linker_node_modules_roots:
if node_modules_root not in node_modules_roots:
node_modules_roots[node_modules_root] = ""

return node_modules_roots

Expand Down
11 changes: 7 additions & 4 deletions packages/esbuild/esbuild.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ def _esbuild_impl(ctx):

# Collect the path alias mapping to resolve packages correctly
if LinkerPackageMappingInfo in dep:
for key, value in dep[LinkerPackageMappingInfo].mappings.items():
# key is of format "package_name:package_path"
package_name = key.split(":")[0]
path_alias_mappings.update(generate_path_mapping(package_name, value.replace(ctx.bin_dir.path + "/", "")))
for mapping in dep[LinkerPackageMappingInfo].mappings.to_list():
path_alias_mappings.update(
generate_path_mapping(
mapping.package_name,
mapping.link_path.replace(ctx.bin_dir.path + "/", ""),
),
)

entry_points = desugar_entry_point_names(ctx.file.entry_point, ctx.files.entry_points)

Expand Down

0 comments on commit 522fd7c

Please sign in to comment.