From f399be32e8d5d16cb4f41ef83e2b2ed1db7d5b29 Mon Sep 17 00:00:00 2001 From: Greg Magolan Date: Wed, 8 Apr 2020 13:03:49 -0700 Subject: [PATCH] fix(builtin): always symlink node_modules at `execroot/my_wksp/node_modules` even when running in runfiles Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` so that when there are no runfiles (default on Windows) and scripts run out of `execroot/my_wksp` they can resolve node_modules with standard node_module resolution Also, restore BAZEL_WORKSPACE name environment variable. The optimization of deriving the workspace name from the path does not work on RBE. While we can derive the workspace from the pwd when running locally because it is in the execroot path `execroot/my_wksp`, on RBE the `execroot/my_wksp` path is reduced a path such as `/w/f/b` so the workspace name is obfuscated from the path. So we provide the workspace name here as an environment variable avaiable to all actions for the runfiles helpers to use. --- internal/linker/index.js | 126 +++++++++------- internal/linker/link_node_modules.ts | 140 +++++++++++------- .../linker/test/link_node_modules.spec.ts | 17 ++- internal/node/node.bzl | 11 ++ .../bazel/tools/bash/runfiles/runfiles.bash | 9 +- 5 files changed, 184 insertions(+), 119 deletions(-) diff --git a/internal/linker/index.js b/internal/linker/index.js index 6af11a4b43..1b30605637 100644 --- a/internal/linker/index.js +++ b/internal/linker/index.js @@ -106,41 +106,58 @@ Include as much of the build output as you can without disclosing anything confi }); } /** - * Resolve a root directory string to the actual location on disk - * where node_modules was installed - * @param root a string like 'npm/node_modules' + * Resolve to an absolute root node_modules directory. + * @param root The bazel managed node_modules root such as 'npm/node_modules', which includes the + * workspace name as the first segment. May be undefined if there are no third_party node_modules + * deps. + * @param startCwd The absolute path that bazel started the action at. + * @param isExecroot True if the action is run in the execroot, false if the action is run in + * runfiles root. + * @param runfiles The runfiles helper object. + * @return The absolute path on disk where node_modules was installed or if no third party + * node_modules are deps of the current target the returns the absolute path to + * `execroot/my_wksp/node_modules`. */ - function resolveRoot(root, runfiles) { + function resolveRoot(root, startCwd, isExecroot, runfiles) { return __awaiter(this, void 0, void 0, function* () { - if (!runfiles.execroot) { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - process.chdir('..'); - } - // create a node_modules directory if no root - // this will be the case if only first-party modules are installed + if (isExecroot) { + // Under execroot, the root will be under an external folder from the startCwd + // `execroot/my_wksp`. For example, `execroot/my_wksp/external/npm/node_modules`. If there is no + // root, which will be the case if there are no third-party modules dependencies for this + // target, set the root to `execroot/my_wksp/node_modules`. + return root ? `${startCwd}/external/${root}` : `${startCwd}/node_modules`; + } + // Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` + // so that when there are no runfiles (default on Windows) and scripts run out of + // `execroot/my_wksp` they can resolve node_modules with standard node_module resolution + // Look for bazel-out which is used to determine the the path to `execroot/my_wksp`. This works in + // all cases including on rbe where the execroot is a path such as `/b/f/w`. For example, when in + // runfiles on rbe, bazel runs the process in a directory such as + // `/b/f/w/bazel-out/k8-fastbuild/bin/path/to/pkg/some_test.sh.runfiles/my_wksp`. From here we can + // determine the execroot `b/f/w` by finding the first instance of bazel-out. + const match = startCwd.match(/\/bazel-out\//); + if (!match) { + panic(`No 'bazel-out' folder found in path '${startCwd}'!`); + return ''; + } + const symlinkRoot = startCwd.slice(0, match.index); + process.chdir(symlinkRoot); if (!root) { - if (!(yield exists('node_modules'))) { - log_verbose('no third-party packages; mkdir node_modules in ', process.cwd()); - yield fs.promises.mkdir('node_modules'); - } - return 'node_modules'; + // If there is no root, which will be the case if there are no third-party modules dependencies + // for this target, set the root to `execroot/my_wksp/node_modules`. + return `${symlinkRoot}/node_modules`; } // If we got a runfilesManifest map, look through it for a resolution // This will happen if we are running a binary that had some npm packages // "statically linked" into its runfiles const fromManifest = runfiles.lookupDirectory(root); - if (fromManifest) + if (fromManifest) { return fromManifest; - if (runfiles.execroot) { - // Under execroot there is an external folder in the root which look - // like 'my_wksp/external/npm/node_modules' - return path.join('external', root); } else { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - return root; + // Under runfiles, the root will be one folder up from the startCwd `runfiles/my_wksp`. + // This is true whether legacy external runfiles are on or off. + return path.resolve(`${startCwd}/../${root}`); } }); } @@ -172,9 +189,8 @@ Include as much of the build output as you can without disclosing anything confi If you want to test runfiles manifest behavior, add --spawn_strategy=standalone to the command line.`); } - // Bazel starts actions with pwd=execroot/my_wksp - this.workspaceDir = path.resolve('.'); - this.workspace = path.basename(this.workspaceDir); + // Bazel starts actions with pwd=execroot/my_wksp or pwd=runfiles/my_wksp + this.workspace = env['BAZEL_WORKSPACE'] || undefined; // If target is from an external workspace such as @npm//rollup/bin:rollup // resolvePackageRelative is not supported since package is in an external // workspace. @@ -183,9 +199,6 @@ Include as much of the build output as you can without disclosing anything confi // //path/to:target -> path/to this.package = target.split(':')[0].replace(/^\/\//, ''); } - // We can derive if the process is being run in the execroot - // if there is a bazel-out folder at the cwd. - this.execroot = existsSync('bazel-out'); } lookupDirectory(dir) { if (!this.manifest) @@ -237,10 +250,17 @@ Include as much of the build output as you can without disclosing anything confi throw new Error(`could not resolve modulePath ${modulePath}`); } resolveWorkspaceRelative(modulePath) { + if (!this.workspace) { + throw new Error('workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } return this.resolve(path.posix.join(this.workspace, modulePath)); } resolvePackageRelative(modulePath) { - if (!this.package) { + if (!this.workspace) { + throw new Error('workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } + // NB: this.package may be '' if at the root of the workspace + if (this.package === undefined) { throw new Error('package could not be determined from the environment; make sure BAZEL_TARGET is set'); } return this.resolve(path.posix.join(this.workspace, this.package, modulePath)); @@ -425,10 +445,24 @@ Include as much of the build output as you can without disclosing anything confi modules = modules || {}; log_verbose('manifest file', modulesManifest); log_verbose('manifest contents', JSON.stringify({ workspace, bin, root, modules }, null, 2)); - // NB: resolveRoot will change the cwd when under runfiles to the runfiles root - const rootDir = yield resolveRoot(root, runfiles); + // Bazel starts actions with pwd=execroot/my_wksp when under execroot or pwd=runfiles/my_wksp + // when under runfiles. + // Normalize the slashes in startCwd for easier matching and manipulation. + const startCwd = process.cwd().replace(/\\/g, '/'); + log_verbose('startCwd', startCwd); + // We can derive if the process is being run in the execroot if there is a bazel-out folder. + const isExecroot = existsSync(`${startCwd}/bazel-out`); + log_verbose('isExecroot', isExecroot.toString()); + // NB: resolveRoot will change the cwd when under runfiles to `execroot/my_wksp` + const rootDir = yield resolveRoot(root, startCwd, isExecroot, runfiles); log_verbose('resolved node_modules root', root, 'to', rootDir); log_verbose('cwd', process.cwd()); + // Create rootDir if it does not exists. This will be the case if there are no third-party deps + // for this target or if outside of the sandbox and there are no node_modules installed. + if (!(yield exists(rootDir))) { + log_verbose('no third-party packages; mkdir node_modules at ', root); + yield fs.promises.mkdir(rootDir); + } // Create the node_modules symlink to the node_modules root that node will resolve from yield symlink(rootDir, 'node_modules'); // Change directory to the node_modules root directory so that all subsequent @@ -440,30 +474,15 @@ Include as much of the build output as you can without disclosing anything confi yield mkdirp(path.dirname(m.name)); if (m.link) { const [root, modulePath] = m.link; - const externalPrefix = 'external/'; let target = ''; switch (root) { case 'execroot': - if (runfiles.execroot) { - target = path.posix.join(runfiles.workspaceDir, modulePath); - } - else { - // If under runfiles, convert from execroot path to runfiles path. - // First strip the bin portion if it exists: - let runfilesPath = modulePath; - if (runfilesPath.startsWith(`${bin}/`)) { - runfilesPath = runfilesPath.slice(bin.length + 1); - } - else if (runfilesPath === bin) { - runfilesPath = ''; - } - // Next replace `external/` with `../` if it exists: - if (runfilesPath.startsWith(externalPrefix)) { - runfilesPath = `../${runfilesPath.slice(externalPrefix.length)}`; - } - target = path.posix.join(runfiles.workspaceDir, runfilesPath); + if (isExecroot) { + target = `${startCwd}/${modulePath}`; + break; } - break; + // If under runfiles, the fall through to 'runfiles' case + // so that we handle case where there is only a MANIFEST file case 'runfiles': // Transform execroot path to the runfiles manifest path so that // it can be resolved with runfiles.resolve() @@ -471,6 +490,7 @@ Include as much of the build output as you can without disclosing anything confi if (runfilesPath.startsWith(`${bin}/`)) { runfilesPath = runfilesPath.slice(bin.length + 1); } + const externalPrefix = 'external/'; if (runfilesPath.startsWith(externalPrefix)) { runfilesPath = runfilesPath.slice(externalPrefix.length); } diff --git a/internal/linker/link_node_modules.ts b/internal/linker/link_node_modules.ts index 6391a0e4ea..7929b2561b 100644 --- a/internal/linker/link_node_modules.ts +++ b/internal/linker/link_node_modules.ts @@ -90,49 +90,71 @@ async function symlink(target: string, p: string): Promise { } /** - * Resolve a root directory string to the actual location on disk - * where node_modules was installed - * @param root a string like 'npm/node_modules' + * Resolve to an absolute root node_modules directory. + * @param root The bazel managed node_modules root such as 'npm/node_modules', which includes the + * workspace name as the first segment. May be undefined if there are no third_party node_modules + * deps. + * @param startCwd The absolute path that bazel started the action at. + * @param isExecroot True if the action is run in the execroot, false if the action is run in + * runfiles root. + * @param runfiles The runfiles helper object. + * @return The absolute path on disk where node_modules was installed or if no third party + * node_modules are deps of the current target the returns the absolute path to + * `execroot/my_wksp/node_modules`. */ -async function resolveRoot(root: string|undefined, runfiles: Runfiles) { - if (!runfiles.execroot) { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - process.chdir('..'); +async function resolveRoot( + root: string|undefined, startCwd: string, isExecroot: boolean, runfiles: Runfiles) { + if (isExecroot) { + // Under execroot, the root will be under an external folder from the startCwd + // `execroot/my_wksp`. For example, `execroot/my_wksp/external/npm/node_modules`. If there is no + // root, which will be the case if there are no third-party modules dependencies for this + // target, set the root to `execroot/my_wksp/node_modules`. + return root ? `${startCwd}/external/${root}` : `${startCwd}/node_modules`; } - // create a node_modules directory if no root - // this will be the case if only first-party modules are installed + + // Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` + // so that when there are no runfiles (default on Windows) and scripts run out of + // `execroot/my_wksp` they can resolve node_modules with standard node_module resolution + + // Look for bazel-out which is used to determine the the path to `execroot/my_wksp`. This works in + // all cases including on rbe where the execroot is a path such as `/b/f/w`. For example, when in + // runfiles on rbe, bazel runs the process in a directory such as + // `/b/f/w/bazel-out/k8-fastbuild/bin/path/to/pkg/some_test.sh.runfiles/my_wksp`. From here we can + // determine the execroot `b/f/w` by finding the first instance of bazel-out. + const match = startCwd.match(/\/bazel-out\//); + if (!match) { + panic(`No 'bazel-out' folder found in path '${startCwd}'!`); + return ''; + } + const symlinkRoot = startCwd.slice(0, match.index); + process.chdir(symlinkRoot); + if (!root) { - if (!await exists('node_modules')) { - log_verbose('no third-party packages; mkdir node_modules in ', process.cwd()); - await fs.promises.mkdir('node_modules'); - } - return 'node_modules'; + // If there is no root, which will be the case if there are no third-party modules dependencies + // for this target, set the root to `execroot/my_wksp/node_modules`. + return `${symlinkRoot}/node_modules`; } // If we got a runfilesManifest map, look through it for a resolution // This will happen if we are running a binary that had some npm packages // "statically linked" into its runfiles const fromManifest = runfiles.lookupDirectory(root); - if (fromManifest) return fromManifest; - - if (runfiles.execroot) { - // Under execroot there is an external folder in the root which look - // like 'my_wksp/external/npm/node_modules' - return path.join('external', root); + if (fromManifest) { + return fromManifest; } else { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - return root; + // Under runfiles, the root will be one folder up from the startCwd `runfiles/my_wksp`. + // This is true whether legacy external runfiles are on or off. + return path.resolve(`${startCwd}/../${root}`) } } export class Runfiles { manifest: Map|undefined; dir: string|undefined; - execroot: boolean; - workspace: string; - workspaceDir: string; + /** + * If the environment gives us enough hints, we can know the workspace name + */ + workspace: string|undefined; /** * If the environment gives us enough hints, we can know the package path */ @@ -164,9 +186,8 @@ export class Runfiles { If you want to test runfiles manifest behavior, add --spawn_strategy=standalone to the command line.`); } - // Bazel starts actions with pwd=execroot/my_wksp - this.workspaceDir = path.resolve('.'); - this.workspace = path.basename(this.workspaceDir); + // Bazel starts actions with pwd=execroot/my_wksp or pwd=runfiles/my_wksp + this.workspace = env['BAZEL_WORKSPACE'] || undefined; // If target is from an external workspace such as @npm//rollup/bin:rollup // resolvePackageRelative is not supported since package is in an external // workspace. @@ -175,9 +196,6 @@ export class Runfiles { // //path/to:target -> path/to this.package = target.split(':')[0].replace(/^\/\//, ''); } - // We can derive if the process is being run in the execroot - // if there is a bazel-out folder at the cwd. - this.execroot = existsSync('bazel-out'); } lookupDirectory(dir: string): string|undefined { @@ -236,11 +254,20 @@ export class Runfiles { } resolveWorkspaceRelative(modulePath: string) { + if (!this.workspace) { + throw new Error( + 'workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } return this.resolve(path.posix.join(this.workspace, modulePath)); } resolvePackageRelative(modulePath: string) { - if (!this.package) { + if (!this.workspace) { + throw new Error( + 'workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } + // NB: this.package may be '' if at the root of the workspace + if (this.package === undefined) { throw new Error( 'package could not be determined from the environment; make sure BAZEL_TARGET is set'); } @@ -485,11 +512,28 @@ export async function main(args: string[], runfiles: Runfiles) { log_verbose('manifest file', modulesManifest); log_verbose('manifest contents', JSON.stringify({workspace, bin, root, modules}, null, 2)); - // NB: resolveRoot will change the cwd when under runfiles to the runfiles root - const rootDir = await resolveRoot(root, runfiles); + // Bazel starts actions with pwd=execroot/my_wksp when under execroot or pwd=runfiles/my_wksp + // when under runfiles. + // Normalize the slashes in startCwd for easier matching and manipulation. + const startCwd = process.cwd().replace(/\\/g, '/'); + log_verbose('startCwd', startCwd); + + // We can derive if the process is being run in the execroot if there is a bazel-out folder. + const isExecroot = existsSync(`${startCwd}/bazel-out`); + log_verbose('isExecroot', isExecroot.toString()); + + // NB: resolveRoot will change the cwd when under runfiles to `execroot/my_wksp` + const rootDir = await resolveRoot(root, startCwd, isExecroot, runfiles); log_verbose('resolved node_modules root', root, 'to', rootDir); log_verbose('cwd', process.cwd()); + // Create rootDir if it does not exists. This will be the case if there are no third-party deps + // for this target or if outside of the sandbox and there are no node_modules installed. + if (!(await exists(rootDir))) { + log_verbose('no third-party packages; mkdir node_modules at ', root); + await fs.promises.mkdir(rootDir); + } + // Create the node_modules symlink to the node_modules root that node will resolve from await symlink(rootDir, 'node_modules'); @@ -503,28 +547,15 @@ export async function main(args: string[], runfiles: Runfiles) { if (m.link) { const [root, modulePath] = m.link; - const externalPrefix = 'external/'; let target: string = ''; switch (root) { case 'execroot': - if (runfiles.execroot) { - target = path.posix.join(runfiles.workspaceDir, modulePath); - } else { - // If under runfiles, convert from execroot path to runfiles path. - // First strip the bin portion if it exists: - let runfilesPath = modulePath; - if (runfilesPath.startsWith(`${bin}/`)) { - runfilesPath = runfilesPath.slice(bin.length + 1); - } else if (runfilesPath === bin) { - runfilesPath = ''; - } - // Next replace `external/` with `../` if it exists: - if (runfilesPath.startsWith(externalPrefix)) { - runfilesPath = `../${runfilesPath.slice(externalPrefix.length)}`; - } - target = path.posix.join(runfiles.workspaceDir, runfilesPath); + if (isExecroot) { + target = `${startCwd}/${modulePath}`; + break; } - break; + // If under runfiles, the fall through to 'runfiles' case + // so that we handle case where there is only a MANIFEST file case 'runfiles': // Transform execroot path to the runfiles manifest path so that // it can be resolved with runfiles.resolve() @@ -532,6 +563,7 @@ export async function main(args: string[], runfiles: Runfiles) { if (runfilesPath.startsWith(`${bin}/`)) { runfilesPath = runfilesPath.slice(bin.length + 1); } + const externalPrefix = 'external/'; if (runfilesPath.startsWith(externalPrefix)) { runfilesPath = runfilesPath.slice(externalPrefix.length); } else { diff --git a/internal/linker/test/link_node_modules.spec.ts b/internal/linker/test/link_node_modules.spec.ts index d6a93909c8..0efa722385 100644 --- a/internal/linker/test/link_node_modules.spec.ts +++ b/internal/linker/test/link_node_modules.spec.ts @@ -24,13 +24,15 @@ function writeRunfiles(manifest: string[]) { describe('link_node_modules', () => { let workspace: string; + let runfilesWorkspace: string; beforeEach(() => { process.chdir(process.env['TEST_TMPDIR']!); // Prevent test isolation failures: each spec gets its own workspace workspace = `wksp_${Date.now()}`; + runfilesWorkspace = `${workspace}/${BIN_DIR}/runfiles/${workspace}`; // Create our local workspace where the build is running - mkdirp(workspace); + mkdirp(runfilesWorkspace); }); function readWorkspaceNodeModules(...parts: string[]) { @@ -517,8 +519,9 @@ describe('link_node_modules', () => { fs.writeFileSync(idx, 'exports = {}', 'utf-8'); const runfilesManifest = [`${idx} ${path.resolve(idx)}`]; - // Set the cwd() like Bazel would in the execroot - process.chdir(workspace); + // Set the cwd() like Bazel would in the runfiles + process.chdir(runfilesWorkspace); + // No first-party packages writeManifest({ 'bin': BIN_DIR, @@ -530,8 +533,10 @@ describe('link_node_modules', () => { 'RUNFILES_MANIFEST_FILE': 'runfiles.mf', })); - // The linker expects to run as its own process, so it changes the wd - process.chdir(path.join(process.env['TEST_TMPDIR']!, workspace)); - expect(fs.readdirSync(path.join('..', 'node_modules', 'some-package'))).toContain('index.js'); + // We expect the linker to symlink node_modules to `execroot/my_wksp/node_modules` when + // under runfiles + expect(fs.readdirSync( + path.join(process.env['TEST_TMPDIR']!, workspace, 'node_modules', 'some-package'))) + .toContain('index.js'); }); }); \ No newline at end of file diff --git a/internal/node/node.bzl b/internal/node/node.bzl index 3aee90702a..bee2b9edbc 100644 --- a/internal/node/node.bzl +++ b/internal/node/node.bzl @@ -178,7 +178,18 @@ def _nodejs_binary_impl(ctx): _write_require_patch_script(ctx) _write_loader_script(ctx) + # Provide the target name as an environment variable avaiable to all actions for the + # runfiles helpers to use. env_vars = "export BAZEL_TARGET=%s\n" % ctx.label + + # While we can derive the workspace from the pwd when running locally + # because it is in the execroot path `execroot/my_wksp`, on RBE the + # `execroot/my_wksp` path is reduced a path such as `/w/f/b` so + # the workspace name is obfuscated from the path. So we provide the workspace + # name here as an environment variable avaiable to all actions for the + # runfiles helpers to use. + env_vars += "export BAZEL_WORKSPACE=%s\n" % ctx.workspace_name + for k in ctx.attr.configuration_env_vars + ctx.attr.default_env_vars: # Check ctx.var first & if env var not in there then check # ctx.configuration.default_shell_env. The former will contain values from --define=FOO=BAR diff --git a/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash b/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash index 3837231381..a7fbb21584 100644 --- a/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash +++ b/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash @@ -135,12 +135,9 @@ function rlocation() { return 1 else # --- begin rules_nodejs custom code --- - # Bazel always sets the PWD to execroot/my_wksp so we derive the workspace - # from the PWD. - export bazel_workspace=$(basename $PWD) - # Normalize ${bazel_workspace}/$1. + # Normalize ${BAZEL_WORKSPACE}/$1. # If $1 is a $(rootpath) this will convert it to the runfiles manifest path - readonly from_rootpath=$(normpath ${bazel_workspace}/$1) + readonly from_rootpath=$(normpath ${BAZEL_WORKSPACE:-/dev/null}/$1) # --- end rules_nodejs custom code --- if [[ -e "${RUNFILES_DIR:-/dev/null}/$1" ]]; then if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then @@ -151,7 +148,7 @@ function rlocation() { # If $1 is a rootpath then check if the converted rootpath to runfiles manifest path file is found elif [[ -e "${RUNFILES_DIR:-/dev/null}/${from_rootpath}" ]]; then if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then - echo >&2 "INFO[runfiles.bash]: rlocation($1): found under RUNFILES_DIR/WKSP ($RUNFILES_DIR/$bazel_workspace), return" + echo >&2 "INFO[runfiles.bash]: rlocation($1): found under RUNFILES_DIR/BAZEL_WORKSPACE ($RUNFILES_DIR/$BAZEL_WORKSPACE), return" fi echo "${RUNFILES_DIR}/${from_rootpath}" # --- end rules_nodejs custom code ---