From bad474fa7d25306d90a5ee1d0f777872f4bff367 Mon Sep 17 00:00:00 2001 From: Greg Magolan Date: Wed, 8 Apr 2020 13:03:49 -0700 Subject: [PATCH] fix(builtin): always symlink node_modules at `execroot/my_wksp/node_modules` even when running in runfiles Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` so that when there are no runfiles (default on Windows) and scripts run out of `execroot/my_wksp` they can resolve node_modules with standard node_module resolution Also, restore BAZEL_WORKSPACE name environment variable. The optimization of deriving the workspace name from the path does not work on RBE: --- internal/linker/index.js | 125 ++++++++------- internal/linker/link_node_modules.ts | 142 +++++++++++------- .../linker/test/link_node_modules.spec.ts | 17 ++- internal/node/node.bzl | 11 ++ .../bazel/tools/bash/runfiles/runfiles.bash | 9 +- 5 files changed, 184 insertions(+), 120 deletions(-) diff --git a/internal/linker/index.js b/internal/linker/index.js index 6af11a4b43..cdbc97ec91 100644 --- a/internal/linker/index.js +++ b/internal/linker/index.js @@ -106,25 +106,46 @@ Include as much of the build output as you can without disclosing anything confi }); } /** - * Resolve a root directory string to the actual location on disk - * where node_modules was installed - * @param root a string like 'npm/node_modules' + * Resolve to an absolute root node_modules directory. + * @param root The bazel managed node_modules root such as 'npm/node_modules', which includes the + * workspace name as the first segment. May be undefined if there are no third_party node_modules + * deps. + * @param startCwd The absolute path that bazel started the action at. + * @param isExecroot True if the action is run in the execroot, false if the action is run in + * runfiles root. + * @param runfiles The runfiles helper object. + * @return The absolute path on disk where node_modules was installed or if no third party + * node_modules are deps of the current target the returns the absolute path to + * `execroot/my_wksp/node_modules`. */ - function resolveRoot(root, runfiles) { + function resolveRoot(root, startCwd, isExecroot, runfiles) { return __awaiter(this, void 0, void 0, function* () { - if (!runfiles.execroot) { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - process.chdir('..'); - } - // create a node_modules directory if no root - // this will be the case if only first-party modules are installed + if (isExecroot) { + // Under execroot, the root will be under an external folder from the startCwd + // `execroot/my_wksp`. For example, `execroot/my_wksp/external/npm/node_modules`. If there is no + // root, which will be the case if there are no third-party modules dependencies for this + // target, set the root to `execroot/my_wksp/node_modules`. + return root ? `${startCwd}/external/${root}` : `${startCwd}/node_modules`; + } + // Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` + // so that when there are no runfiles (default on Windows) and scripts run out of + // `execroot/my_wksp` they can resolve node_modules with standard node_module resolution + // Look for bazel-out which is used to determine the the path to `execroot/my_wksp`. This works in + // all cases including on rbe where the execroot is a path such as `/b/f/w`. For example, when in + // runfiles on rbe, bazel runs the process in a directory such as + // `/b/f/w/bazel-out/k8-fastbuild/bin/path/to/pkg/some_test.sh.runfiles/my_wksp`. From here we can + // determine the execroot `b/f/w` by finding the first instance of bazel-out. + const match = startCwd.match(/\/bazel-out\//); + if (!match) { + panic('No bazel-out folder found in path!'); + return ''; + } + const symlinkRoot = startCwd.slice(0, match.index); + process.chdir(symlinkRoot); if (!root) { - if (!(yield exists('node_modules'))) { - log_verbose('no third-party packages; mkdir node_modules in ', process.cwd()); - yield fs.promises.mkdir('node_modules'); - } - return 'node_modules'; + // If there is no root, which will be the case if there are no third-party modules dependencies + // for this target, set the root to `execroot/my_wksp/node_modules`. + return `${symlinkRoot}/node_modules`; } // If we got a runfilesManifest map, look through it for a resolution // This will happen if we are running a binary that had some npm packages @@ -132,15 +153,10 @@ Include as much of the build output as you can without disclosing anything confi const fromManifest = runfiles.lookupDirectory(root); if (fromManifest) return fromManifest; - if (runfiles.execroot) { - // Under execroot there is an external folder in the root which look - // like 'my_wksp/external/npm/node_modules' - return path.join('external', root); - } else { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - return root; + // Under runfiles, the root will be one folder up from the startCwd `runfiles/my_wksp`. + // This is true whether legacy external runfiles are on or off. + return path.resolve(`${startCwd}/../${root}`); } }); } @@ -172,9 +188,8 @@ Include as much of the build output as you can without disclosing anything confi If you want to test runfiles manifest behavior, add --spawn_strategy=standalone to the command line.`); } - // Bazel starts actions with pwd=execroot/my_wksp - this.workspaceDir = path.resolve('.'); - this.workspace = path.basename(this.workspaceDir); + // Bazel starts actions with pwd=execroot/my_wksp or pwd=runfiles/my_wksp + this.workspace = env['BAZEL_WORKSPACE'] || undefined; // If target is from an external workspace such as @npm//rollup/bin:rollup // resolvePackageRelative is not supported since package is in an external // workspace. @@ -183,9 +198,6 @@ Include as much of the build output as you can without disclosing anything confi // //path/to:target -> path/to this.package = target.split(':')[0].replace(/^\/\//, ''); } - // We can derive if the process is being run in the execroot - // if there is a bazel-out folder at the cwd. - this.execroot = existsSync('bazel-out'); } lookupDirectory(dir) { if (!this.manifest) @@ -237,10 +249,17 @@ Include as much of the build output as you can without disclosing anything confi throw new Error(`could not resolve modulePath ${modulePath}`); } resolveWorkspaceRelative(modulePath) { + if (!this.workspace) { + throw new Error('workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } return this.resolve(path.posix.join(this.workspace, modulePath)); } resolvePackageRelative(modulePath) { - if (!this.package) { + if (!this.workspace) { + throw new Error('workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } + // NB: this.package may be '' if at the root of the workspace + if (this.package === undefined) { throw new Error('package could not be determined from the environment; make sure BAZEL_TARGET is set'); } return this.resolve(path.posix.join(this.workspace, this.package, modulePath)); @@ -425,10 +444,24 @@ Include as much of the build output as you can without disclosing anything confi modules = modules || {}; log_verbose('manifest file', modulesManifest); log_verbose('manifest contents', JSON.stringify({ workspace, bin, root, modules }, null, 2)); - // NB: resolveRoot will change the cwd when under runfiles to the runfiles root - const rootDir = yield resolveRoot(root, runfiles); + // Bazel starts actions with pwd=execroot/my_wksp when under execroot or pwd=runfiles/my_wksp + // when under runfiles. + // Normalize the slashes in startCwd for easier matching and manipulation. + const startCwd = process.cwd().replace(/\\/g, '/'); + log_verbose('startCwd', startCwd); + // We can derive if the process is being run in the execroot if there is a bazel-out folder. + const isExecroot = existsSync(`${startCwd}/bazel-out`); + log_verbose('isExecroot', isExecroot.toString()); + // NB: resolveRoot will change the cwd when under runfiles to `execroot/my_wksp` + const rootDir = yield resolveRoot(root, startCwd, isExecroot, runfiles); log_verbose('resolved node_modules root', root, 'to', rootDir); log_verbose('cwd', process.cwd()); + // Create rootDir if it does not exists. This will be the case if there are no third-party deps + // for this target or if outside of the sandbox and there are no node_modules installed. + if (!(yield exists(rootDir))) { + log_verbose('no third-party packages; mkdir node_modules at ', root); + yield fs.promises.mkdir(rootDir); + } // Create the node_modules symlink to the node_modules root that node will resolve from yield symlink(rootDir, 'node_modules'); // Change directory to the node_modules root directory so that all subsequent @@ -440,30 +473,15 @@ Include as much of the build output as you can without disclosing anything confi yield mkdirp(path.dirname(m.name)); if (m.link) { const [root, modulePath] = m.link; - const externalPrefix = 'external/'; let target = ''; switch (root) { case 'execroot': - if (runfiles.execroot) { - target = path.posix.join(runfiles.workspaceDir, modulePath); - } - else { - // If under runfiles, convert from execroot path to runfiles path. - // First strip the bin portion if it exists: - let runfilesPath = modulePath; - if (runfilesPath.startsWith(`${bin}/`)) { - runfilesPath = runfilesPath.slice(bin.length + 1); - } - else if (runfilesPath === bin) { - runfilesPath = ''; - } - // Next replace `external/` with `../` if it exists: - if (runfilesPath.startsWith(externalPrefix)) { - runfilesPath = `../${runfilesPath.slice(externalPrefix.length)}`; - } - target = path.posix.join(runfiles.workspaceDir, runfilesPath); + if (isExecroot) { + target = `${startCwd}/${modulePath}`; + break; } - break; + // If under runfiles, the fall through to 'runfiles' case + // so that we handle case where there is only a MANIFEST file case 'runfiles': // Transform execroot path to the runfiles manifest path so that // it can be resolved with runfiles.resolve() @@ -471,6 +489,7 @@ Include as much of the build output as you can without disclosing anything confi if (runfilesPath.startsWith(`${bin}/`)) { runfilesPath = runfilesPath.slice(bin.length + 1); } + const externalPrefix = 'external/'; if (runfilesPath.startsWith(externalPrefix)) { runfilesPath = runfilesPath.slice(externalPrefix.length); } diff --git a/internal/linker/link_node_modules.ts b/internal/linker/link_node_modules.ts index 6391a0e4ea..c6bb959658 100644 --- a/internal/linker/link_node_modules.ts +++ b/internal/linker/link_node_modules.ts @@ -90,49 +90,71 @@ async function symlink(target: string, p: string): Promise { } /** - * Resolve a root directory string to the actual location on disk - * where node_modules was installed - * @param root a string like 'npm/node_modules' + * Resolve to an absolute root node_modules directory. + * @param root The bazel managed node_modules root such as 'npm/node_modules', which includes the + * workspace name as the first segment. May be undefined if there are no third_party node_modules + * deps. + * @param startCwd The absolute path that bazel started the action at. + * @param isExecroot True if the action is run in the execroot, false if the action is run in + * runfiles root. + * @param runfiles The runfiles helper object. + * @return The absolute path on disk where node_modules was installed or if no third party + * node_modules are deps of the current target the returns the absolute path to + * `execroot/my_wksp/node_modules`. */ -async function resolveRoot(root: string|undefined, runfiles: Runfiles) { - if (!runfiles.execroot) { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - process.chdir('..'); +async function resolveRoot( + root: string|undefined, startCwd: string, isExecroot: boolean, runfiles: Runfiles) { + if (isExecroot) { + // Under execroot, the root will be under an external folder from the startCwd + // `execroot/my_wksp`. For example, `execroot/my_wksp/external/npm/node_modules`. If there is no + // root, which will be the case if there are no third-party modules dependencies for this + // target, set the root to `execroot/my_wksp/node_modules`. + return root ? `${startCwd}/external/${root}` : `${startCwd}/node_modules`; } - // create a node_modules directory if no root - // this will be the case if only first-party modules are installed + + // Under runfiles, the linker should symlink node_modules at `execroot/my_wksp` + // so that when there are no runfiles (default on Windows) and scripts run out of + // `execroot/my_wksp` they can resolve node_modules with standard node_module resolution + + // Look for bazel-out which is used to determine the the path to `execroot/my_wksp`. This works in + // all cases including on rbe where the execroot is a path such as `/b/f/w`. For example, when in + // runfiles on rbe, bazel runs the process in a directory such as + // `/b/f/w/bazel-out/k8-fastbuild/bin/path/to/pkg/some_test.sh.runfiles/my_wksp`. From here we can + // determine the execroot `b/f/w` by finding the first instance of bazel-out. + const match = startCwd.match(/\/bazel-out\//); + if (!match) { + panic('No bazel-out folder found in path!') + return ''; + } + const symlinkRoot = startCwd.slice(0, match.index); + process.chdir(symlinkRoot); + if (!root) { - if (!await exists('node_modules')) { - log_verbose('no third-party packages; mkdir node_modules in ', process.cwd()); - await fs.promises.mkdir('node_modules'); - } - return 'node_modules'; + // If there is no root, which will be the case if there are no third-party modules dependencies + // for this target, set the root to `execroot/my_wksp/node_modules`. + return `${symlinkRoot}/node_modules` } // If we got a runfilesManifest map, look through it for a resolution // This will happen if we are running a binary that had some npm packages // "statically linked" into its runfiles const fromManifest = runfiles.lookupDirectory(root); - if (fromManifest) return fromManifest; - - if (runfiles.execroot) { - // Under execroot there is an external folder in the root which look - // like 'my_wksp/external/npm/node_modules' - return path.join('external', root); - } else { - // Under runfiles, the repository should be layed out in the parent directory - // since bazel sets our working directory to the repository where the build is happening - return root; + if (fromManifest) + return fromManifest; + else { + // Under runfiles, the root will be one folder up from the startCwd `runfiles/my_wksp`. + // This is true whether legacy external runfiles are on or off. + return path.resolve(`${startCwd}/../${root}`) } } export class Runfiles { manifest: Map|undefined; dir: string|undefined; - execroot: boolean; - workspace: string; - workspaceDir: string; + /** + * If the environment gives us enough hints, we can know the workspace name + */ + workspace: string|undefined; /** * If the environment gives us enough hints, we can know the package path */ @@ -164,9 +186,8 @@ export class Runfiles { If you want to test runfiles manifest behavior, add --spawn_strategy=standalone to the command line.`); } - // Bazel starts actions with pwd=execroot/my_wksp - this.workspaceDir = path.resolve('.'); - this.workspace = path.basename(this.workspaceDir); + // Bazel starts actions with pwd=execroot/my_wksp or pwd=runfiles/my_wksp + this.workspace = env['BAZEL_WORKSPACE'] || undefined; // If target is from an external workspace such as @npm//rollup/bin:rollup // resolvePackageRelative is not supported since package is in an external // workspace. @@ -175,9 +196,6 @@ export class Runfiles { // //path/to:target -> path/to this.package = target.split(':')[0].replace(/^\/\//, ''); } - // We can derive if the process is being run in the execroot - // if there is a bazel-out folder at the cwd. - this.execroot = existsSync('bazel-out'); } lookupDirectory(dir: string): string|undefined { @@ -236,11 +254,20 @@ export class Runfiles { } resolveWorkspaceRelative(modulePath: string) { + if (!this.workspace) { + throw new Error( + 'workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } return this.resolve(path.posix.join(this.workspace, modulePath)); } resolvePackageRelative(modulePath: string) { - if (!this.package) { + if (!this.workspace) { + throw new Error( + 'workspace could not be determined from the environment; make sure BAZEL_WORKSPACE is set'); + } + // NB: this.package may be '' if at the root of the workspace + if (this.package === undefined) { throw new Error( 'package could not be determined from the environment; make sure BAZEL_TARGET is set'); } @@ -485,11 +512,28 @@ export async function main(args: string[], runfiles: Runfiles) { log_verbose('manifest file', modulesManifest); log_verbose('manifest contents', JSON.stringify({workspace, bin, root, modules}, null, 2)); - // NB: resolveRoot will change the cwd when under runfiles to the runfiles root - const rootDir = await resolveRoot(root, runfiles); + // Bazel starts actions with pwd=execroot/my_wksp when under execroot or pwd=runfiles/my_wksp + // when under runfiles. + // Normalize the slashes in startCwd for easier matching and manipulation. + const startCwd = process.cwd().replace(/\\/g, '/'); + log_verbose('startCwd', startCwd); + + // We can derive if the process is being run in the execroot if there is a bazel-out folder. + const isExecroot = existsSync(`${startCwd}/bazel-out`); + log_verbose('isExecroot', isExecroot.toString()); + + // NB: resolveRoot will change the cwd when under runfiles to `execroot/my_wksp` + const rootDir = await resolveRoot(root, startCwd, isExecroot, runfiles); log_verbose('resolved node_modules root', root, 'to', rootDir); log_verbose('cwd', process.cwd()); + // Create rootDir if it does not exists. This will be the case if there are no third-party deps + // for this target or if outside of the sandbox and there are no node_modules installed. + if (!(await exists(rootDir))) { + log_verbose('no third-party packages; mkdir node_modules at ', root); + await fs.promises.mkdir(rootDir); + } + // Create the node_modules symlink to the node_modules root that node will resolve from await symlink(rootDir, 'node_modules'); @@ -503,28 +547,15 @@ export async function main(args: string[], runfiles: Runfiles) { if (m.link) { const [root, modulePath] = m.link; - const externalPrefix = 'external/'; let target: string = ''; switch (root) { case 'execroot': - if (runfiles.execroot) { - target = path.posix.join(runfiles.workspaceDir, modulePath); - } else { - // If under runfiles, convert from execroot path to runfiles path. - // First strip the bin portion if it exists: - let runfilesPath = modulePath; - if (runfilesPath.startsWith(`${bin}/`)) { - runfilesPath = runfilesPath.slice(bin.length + 1); - } else if (runfilesPath === bin) { - runfilesPath = ''; - } - // Next replace `external/` with `../` if it exists: - if (runfilesPath.startsWith(externalPrefix)) { - runfilesPath = `../${runfilesPath.slice(externalPrefix.length)}`; - } - target = path.posix.join(runfiles.workspaceDir, runfilesPath); + if (isExecroot) { + target = `${startCwd}/${modulePath}`; + break; } - break; + // If under runfiles, the fall through to 'runfiles' case + // so that we handle case where there is only a MANIFEST file case 'runfiles': // Transform execroot path to the runfiles manifest path so that // it can be resolved with runfiles.resolve() @@ -532,6 +563,7 @@ export async function main(args: string[], runfiles: Runfiles) { if (runfilesPath.startsWith(`${bin}/`)) { runfilesPath = runfilesPath.slice(bin.length + 1); } + const externalPrefix = 'external/'; if (runfilesPath.startsWith(externalPrefix)) { runfilesPath = runfilesPath.slice(externalPrefix.length); } else { diff --git a/internal/linker/test/link_node_modules.spec.ts b/internal/linker/test/link_node_modules.spec.ts index d6a93909c8..0efa722385 100644 --- a/internal/linker/test/link_node_modules.spec.ts +++ b/internal/linker/test/link_node_modules.spec.ts @@ -24,13 +24,15 @@ function writeRunfiles(manifest: string[]) { describe('link_node_modules', () => { let workspace: string; + let runfilesWorkspace: string; beforeEach(() => { process.chdir(process.env['TEST_TMPDIR']!); // Prevent test isolation failures: each spec gets its own workspace workspace = `wksp_${Date.now()}`; + runfilesWorkspace = `${workspace}/${BIN_DIR}/runfiles/${workspace}`; // Create our local workspace where the build is running - mkdirp(workspace); + mkdirp(runfilesWorkspace); }); function readWorkspaceNodeModules(...parts: string[]) { @@ -517,8 +519,9 @@ describe('link_node_modules', () => { fs.writeFileSync(idx, 'exports = {}', 'utf-8'); const runfilesManifest = [`${idx} ${path.resolve(idx)}`]; - // Set the cwd() like Bazel would in the execroot - process.chdir(workspace); + // Set the cwd() like Bazel would in the runfiles + process.chdir(runfilesWorkspace); + // No first-party packages writeManifest({ 'bin': BIN_DIR, @@ -530,8 +533,10 @@ describe('link_node_modules', () => { 'RUNFILES_MANIFEST_FILE': 'runfiles.mf', })); - // The linker expects to run as its own process, so it changes the wd - process.chdir(path.join(process.env['TEST_TMPDIR']!, workspace)); - expect(fs.readdirSync(path.join('..', 'node_modules', 'some-package'))).toContain('index.js'); + // We expect the linker to symlink node_modules to `execroot/my_wksp/node_modules` when + // under runfiles + expect(fs.readdirSync( + path.join(process.env['TEST_TMPDIR']!, workspace, 'node_modules', 'some-package'))) + .toContain('index.js'); }); }); \ No newline at end of file diff --git a/internal/node/node.bzl b/internal/node/node.bzl index 3aee90702a..bee2b9edbc 100644 --- a/internal/node/node.bzl +++ b/internal/node/node.bzl @@ -178,7 +178,18 @@ def _nodejs_binary_impl(ctx): _write_require_patch_script(ctx) _write_loader_script(ctx) + # Provide the target name as an environment variable avaiable to all actions for the + # runfiles helpers to use. env_vars = "export BAZEL_TARGET=%s\n" % ctx.label + + # While we can derive the workspace from the pwd when running locally + # because it is in the execroot path `execroot/my_wksp`, on RBE the + # `execroot/my_wksp` path is reduced a path such as `/w/f/b` so + # the workspace name is obfuscated from the path. So we provide the workspace + # name here as an environment variable avaiable to all actions for the + # runfiles helpers to use. + env_vars += "export BAZEL_WORKSPACE=%s\n" % ctx.workspace_name + for k in ctx.attr.configuration_env_vars + ctx.attr.default_env_vars: # Check ctx.var first & if env var not in there then check # ctx.configuration.default_shell_env. The former will contain values from --define=FOO=BAR diff --git a/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash b/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash index 3837231381..a7fbb21584 100644 --- a/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash +++ b/third_party/github.com/bazelbuild/bazel/tools/bash/runfiles/runfiles.bash @@ -135,12 +135,9 @@ function rlocation() { return 1 else # --- begin rules_nodejs custom code --- - # Bazel always sets the PWD to execroot/my_wksp so we derive the workspace - # from the PWD. - export bazel_workspace=$(basename $PWD) - # Normalize ${bazel_workspace}/$1. + # Normalize ${BAZEL_WORKSPACE}/$1. # If $1 is a $(rootpath) this will convert it to the runfiles manifest path - readonly from_rootpath=$(normpath ${bazel_workspace}/$1) + readonly from_rootpath=$(normpath ${BAZEL_WORKSPACE:-/dev/null}/$1) # --- end rules_nodejs custom code --- if [[ -e "${RUNFILES_DIR:-/dev/null}/$1" ]]; then if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then @@ -151,7 +148,7 @@ function rlocation() { # If $1 is a rootpath then check if the converted rootpath to runfiles manifest path file is found elif [[ -e "${RUNFILES_DIR:-/dev/null}/${from_rootpath}" ]]; then if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then - echo >&2 "INFO[runfiles.bash]: rlocation($1): found under RUNFILES_DIR/WKSP ($RUNFILES_DIR/$bazel_workspace), return" + echo >&2 "INFO[runfiles.bash]: rlocation($1): found under RUNFILES_DIR/BAZEL_WORKSPACE ($RUNFILES_DIR/$BAZEL_WORKSPACE), return" fi echo "${RUNFILES_DIR}/${from_rootpath}" # --- end rules_nodejs custom code ---