Skip to content

Commit

Permalink
module: detect ESM syntax by trying to recompile as SourceTextModule
Browse files Browse the repository at this point in the history
Instead of using an async function wrapper, just try compiling code with
unknown module format as SourceTextModule when it cannot be compiled
as CJS and the error message indicates that it's worth a retry. If
it can be parsed as SourceTextModule then it's considered ESM.

Also, move shouldRetryAsESM() to C++ completely so that
we can reuse it in the CJS module loader for require(esm).

Drive-by: move methods that don't belong to ContextifyContext
out as static methods and move GetHostDefinedOptions to
ModuleWrap.
  • Loading branch information
joyeecheung committed Apr 7, 2024
1 parent 756acd0 commit 179dabd
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 337 deletions.
4 changes: 2 additions & 2 deletions lib/internal/modules/esm/get_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
if (getOptionValue('--experimental-detect-module')) {
const format = source ?
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
null;
if (format === 'module') {
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
Expand Down Expand Up @@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
if (!source) { return null; }
const format = getFormatOfExtensionlessFile(url);
if (format === 'module') {
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
}
return format;
}
Expand Down
35 changes: 0 additions & 35 deletions lib/internal/modules/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@ const {
} = require('internal/errors').codes;
const { BuiltinModule } = require('internal/bootstrap/realm');

const {
shouldRetryAsESM: contextifyShouldRetryAsESM,
constants: {
syntaxDetectionErrors: {
esmSyntaxErrorMessages,
throwsOnlyInCommonJSErrorMessages,
},
},
} = internalBinding('contextify');
const { validateString } = require('internal/validators');
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
const internalFS = require('internal/fs/utils');
Expand Down Expand Up @@ -329,31 +320,6 @@ function normalizeReferrerURL(referrerName) {
}


let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
/**
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
* @param {string} source Module contents
*/
function shouldRetryAsESM(errorMessage, source) {
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
return true;
}

throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
}

return false;
}


// Whether we have started executing any user-provided CJS code.
// This is set right before we call the wrapped CJS code (not after,
// in case we are half-way in the execution when internals check this).
Expand All @@ -373,7 +339,6 @@ module.exports = {
loadBuiltinModule,
makeRequireFunction,
normalizeReferrerURL,
shouldRetryAsESM,
stripBOM,
toRealPath,
hasStartedUserCJSExecution() {
Expand Down
26 changes: 16 additions & 10 deletions lib/internal/modules/run_main.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
'use strict';

const {
ObjectGetPrototypeOf,
SyntaxErrorPrototype,
StringPrototypeEndsWith,
globalThis,
} = primordials;
Expand Down Expand Up @@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
function executeUserEntryPoint(main = process.argv[1]) {
const resolvedMain = resolveMainPath(main);
const useESMLoader = shouldUseESMLoader(resolvedMain);

let mainURL;
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
let retryAsESM = false;
if (!useESMLoader) {
const cjsLoader = require('internal/modules/cjs/loader');
const { Module } = cjsLoader;
if (getOptionValue('--experimental-detect-module')) {
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
try {
// Module._load is the monkey-patchable CJS module loader.
Module._load(main, null, true);
} catch (error) {
const source = cjsLoader.entryPointSource;
const { shouldRetryAsESM } = require('internal/modules/helpers');
retryAsESM = shouldRetryAsESM(error.message, source);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
if (ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
const { shouldRetryAsESM } = internalBinding('contextify');
const mainPath = resolvedMain || main;
mainURL = pathToFileURL(mainPath).href;
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainPath);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
}
if (!retryAsESM) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(error, source, resolvedMain);
throw error;
}
}
Expand All @@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {

if (useESMLoader || retryAsESM) {
const mainPath = resolvedMain || main;
const mainURL = pathToFileURL(mainPath).href;
if (mainURL === undefined) {
mainURL = pathToFileURL(mainPath).href;
}

runEntryPointWithESMLoader((cascadedLoader) => {
// Note that if the graph contains unsettled TLA, this may never resolve
Expand Down
115 changes: 82 additions & 33 deletions src/module_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,18 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
return v8::Just(false);
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
Isolate* isolate, Local<Symbol> id_symbol) {
Local<PrimitiveArray> host_defined_options =
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
return host_defined_options;
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption) new ModuleWrap(url, context, exportNames,
// syntheticExecutionFunction)
// idSymbol);
// new ModuleWrap(url, context, exportNames, syntheticExecutionFunction)
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
CHECK(args.IsConstructCall());
CHECK_GE(args.Length(), 3);
Expand Down Expand Up @@ -174,17 +182,16 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
int column_offset = 0;

bool synthetic = args[2]->IsArray();

Local<PrimitiveArray> host_defined_options =
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
Local<PrimitiveArray> host_defined_options;
Local<Symbol> id_symbol;
if (synthetic) {
// new ModuleWrap(url, context, exportNames, syntheticExecutionFunction)
CHECK(args[3]->IsFunction());
} else {
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
// cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption)
// idSymbol);
CHECK(args[2]->IsString());
CHECK(args[3]->IsNumber());
line_offset = args[3].As<Int32>()->Value();
Expand All @@ -195,7 +202,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
} else {
id_symbol = Symbol::New(isolate, url);
}
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);

if (that->SetPrivate(context,
realm->isolate_data()->host_defined_option_symbol(),
Expand Down Expand Up @@ -230,36 +237,32 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
module = Module::CreateSyntheticModule(
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
} else {
ScriptCompiler::CachedData* cached_data = nullptr;
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
if (id_symbol !=
realm->isolate_data()->source_text_module_default_hdo()) {
// TODO(joyeecheung): when we are compiling for the default loader, this
// will be std::nullopt, and CompileSourceTextModule() should use
// on-disk cache. See: https://github.com/nodejs/node/issues/47472
user_cached_data = nullptr;
}
if (args[5]->IsArrayBufferView()) {
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
uint8_t* data =
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
cached_data =
user_cached_data =
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
cached_data_buf->ByteLength());
}

Local<String> source_text = args[2].As<String>();
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);
ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (source.GetCachedData() == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}
if (!ScriptCompiler::CompileModule(isolate, &source, options)
bool cache_rejected = false;
if (!CompileSourceTextModule(realm,
source_text,
url,
line_offset,
column_offset,
host_defined_options,
user_cached_data,
&cache_rejected)
.ToLocal(&module)) {
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
CHECK(!try_catch.Message().IsEmpty());
Expand All @@ -272,8 +275,9 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
}
return;
}
if (options == ScriptCompiler::kConsumeCodeCache &&
source.GetCachedData()->rejected) {

if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
cache_rejected) {
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
realm, "cachedData buffer was rejected");
try_catch.ReThrow();
Expand Down Expand Up @@ -310,6 +314,51 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(that);
}

MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
Realm* realm,
Local<String> source_text,
Local<String> url,
int line_offset,
int column_offset,
Local<PrimitiveArray> host_defined_options,
std::optional<ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected) {
Isolate* isolate = realm->isolate();
EscapableHandleScope scope(isolate);
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);
ScriptCompiler::CachedData* cached_data = nullptr;
if (user_cached_data.has_value()) {
cached_data = user_cached_data.value();
}
ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (cached_data == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}
Local<Module> module;
if (!ScriptCompiler::CompileModule(isolate, &source, options)
.ToLocal(&module)) {
return scope.EscapeMaybe(MaybeLocal<Module>());
}
if (cached_data != nullptr) {
*cache_rejected = source.GetCachedData()->rejected;
}

return scope.Escape(module);
}

static Local<Object> createImportAttributesContainer(
Realm* realm,
Isolate* isolate,
Expand Down
21 changes: 20 additions & 1 deletion src/module_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <unordered_map>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
#include "base_object.h"
#include "v8-script.h"

namespace node {

Expand Down Expand Up @@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
return true;
}

static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);

// When user_cached_data is not std::nullopt, use the code cache if it's not
// nullptr, otherwise don't use code cache.
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
// See: https://github.com/nodejs/node/issues/47472
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
Realm* realm,
v8::Local<v8::String> source_text,
v8::Local<v8::String> url,
int line_offset,
int column_offset,
v8::Local<v8::PrimitiveArray> host_defined_options,
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected);

private:
ModuleWrap(Realm* realm,
v8::Local<v8::Object> object,
Expand Down
Loading

0 comments on commit 179dabd

Please sign in to comment.