Skip to content

Commit

Permalink
url: refactor pathToFileURL to native
Browse files Browse the repository at this point in the history
PR-URL: #55476
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
  • Loading branch information
aduh95 authored and ruyadorno committed Nov 27, 2024
1 parent 7acb963 commit 6317f77
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 77 deletions.
92 changes: 15 additions & 77 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ const {
ObjectSetPrototypeOf,
ReflectGetOwnPropertyDescriptor,
ReflectOwnKeys,
RegExpPrototypeSymbolReplace,
SafeMap,
SafeSet,
StringPrototypeCharAt,
Expand Down Expand Up @@ -779,6 +778,8 @@ function isURL(self) {
* for invalid URL inputs.
*/
const kParseURLSymbol = Symbol('kParseURL');
const kCreateURLFromPosixPathSymbol = Symbol('kCreateURLFromPosixPath');
const kCreateURLFromWindowsPathSymbol = Symbol('kCreateURLFromWindowsPath');

class URL {
#context = new URLContext();
Expand Down Expand Up @@ -812,8 +813,17 @@ class URL {
base = `${base}`;
}

const raiseException = parseSymbol !== kParseURLSymbol;
const href = bindingUrl.parse(input, base, raiseException);
let href;
if (arguments.length < 3) {
href = bindingUrl.parse(input, base, true);
} else {
const raiseException = parseSymbol !== kParseURLSymbol;
const interpretAsWindowsPath = parseSymbol === kCreateURLFromWindowsPathSymbol;
const pathToFileURL = interpretAsWindowsPath || (parseSymbol === kCreateURLFromPosixPathSymbol);
href = pathToFileURL ?
bindingUrl.pathToFileURL(input, interpretAsWindowsPath, base) :
bindingUrl.parse(input, base, raiseException);
}
if (href) {
this.#updateContext(href);
}
Expand Down Expand Up @@ -1500,76 +1510,9 @@ function fileURLToPath(path, options = kEmptyObject) {
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
const percentRegEx = /%/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const quoteRegEx = /"/g;
const hashRegex = /#/g;
const spaceRegEx = / /g;
const questionMarkRegex = /\?/g;
const openSquareBracketRegEx = /\[/g;
const backslashRegEx = /\\/g;
const closeSquareBracketRegEx = /]/g;
const caretRegEx = /\^/g;
const verticalBarRegEx = /\|/g;
const tildeRegEx = /~/g;

function encodePathChars(filepath, options = kEmptyObject) {
if (StringPrototypeIncludes(filepath, '%')) {
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
}

if (StringPrototypeIncludes(filepath, '\t')) {
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, '\n')) {
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
}
if (StringPrototypeIncludes(filepath, '\r')) {
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
}
if (StringPrototypeIncludes(filepath, ' ')) {
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
}
if (StringPrototypeIncludes(filepath, '"')) {
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
}
if (StringPrototypeIncludes(filepath, '#')) {
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
}
if (StringPrototypeIncludes(filepath, '?')) {
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
}
if (StringPrototypeIncludes(filepath, '[')) {
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
}
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
}
if (StringPrototypeIncludes(filepath, ']')) {
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
}
if (StringPrototypeIncludes(filepath, '^')) {
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
}
if (StringPrototypeIncludes(filepath, '|')) {
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
}
if (StringPrototypeIncludes(filepath, '~')) {
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
}

return filepath;
}

function pathToFileURL(filepath, options = kEmptyObject) {
const windows = options?.windows ?? isWindows;
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
// "\\?\UNC\" path prefix should be ignored.
Expand All @@ -1592,12 +1535,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
);
}
const hostname = StringPrototypeSlice(filepath, prefixLength, hostnameEndIndex);
outURL.hostname = domainToASCII(hostname);
outURL.pathname = encodePathChars(
RegExpPrototypeSymbolReplace(backslashRegEx, StringPrototypeSlice(filepath, hostnameEndIndex), '/'),
{ windows },
);
return outURL;
return new URL(StringPrototypeSlice(filepath, hostnameEndIndex), hostname, kCreateURLFromWindowsPathSymbol);
}
let resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);
// path.resolve strips trailing slashes so we must add them back
Expand All @@ -1608,7 +1546,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
resolved[resolved.length - 1] !== path.sep)
resolved += '/';

return new URL(`file://${encodePathChars(resolved, { windows })}`);
return new URL(resolved, undefined, windows ? kCreateURLFromWindowsPathSymbol : kCreateURLFromPosixPathSymbol);
}

function toPathIfFileURL(fileURLOrPath) {
Expand Down
104 changes: 104 additions & 0 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,108 @@ void BindingData::Deserialize(v8::Local<v8::Context> context,
CHECK_NOT_NULL(binding);
}

#ifndef LARGEST_ASCII_CHAR_CODE_TO_ENCODE
#define LARGEST_ASCII_CHAR_CODE_TO_ENCODE '~'
#endif

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
constexpr auto lookup_table = []() consteval {
// Each entry is an array that can hold up to 3 chars + null terminator
std::array<std::array<char, 4>, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1>
result{};

for (uint8_t i = 0; i <= LARGEST_ASCII_CHAR_CODE_TO_ENCODE; i++) {
switch (i) {
#define ENCODE_CHAR(CHAR, HEX_DIGIT_2, HEX_DIGIT_1) \
case CHAR: \
result[i] = {{'%', HEX_DIGIT_2, HEX_DIGIT_1, 0}}; \
break;

ENCODE_CHAR('\0', '0', '0') // '\0' == 0x00
ENCODE_CHAR('\t', '0', '9') // '\t' == 0x09
ENCODE_CHAR('\n', '0', 'A') // '\n' == 0x0A
ENCODE_CHAR('\r', '0', 'D') // '\r' == 0x0D
ENCODE_CHAR(' ', '2', '0') // ' ' == 0x20
ENCODE_CHAR('"', '2', '2') // '"' == 0x22
ENCODE_CHAR('#', '2', '3') // '#' == 0x23
ENCODE_CHAR('%', '2', '5') // '%' == 0x25
ENCODE_CHAR('?', '3', 'F') // '?' == 0x3F
ENCODE_CHAR('[', '5', 'B') // '[' == 0x5B
ENCODE_CHAR('\\', '5', 'C') // '\\' == 0x5C
ENCODE_CHAR(']', '5', 'D') // ']' == 0x5D
ENCODE_CHAR('^', '5', 'E') // '^' == 0x5E
ENCODE_CHAR('|', '7', 'C') // '|' == 0x7C
ENCODE_CHAR('~', '7', 'E') // '~' == 0x7E
#undef ENCODE_CHAR

default:
result[i] = {{static_cast<char>(i), '\0', '\0', '\0'}};
break;
}
}

return result;
}
();

enum class OS { WINDOWS, POSIX };

std::string EncodePathChars(std::string_view input_str, OS operating_system) {
std::string encoded = "file://";
encoded.reserve(input_str.size() +
7); // Reserve space for "file://" and input_str
for (size_t i : input_str) {
if (i > LARGEST_ASCII_CHAR_CODE_TO_ENCODE) [[unlikely]] {
encoded.push_back(i);
continue;
}
if (operating_system == OS::WINDOWS) {
if (i == '\\') {
encoded.push_back('/');
continue;
}
}
encoded.append(lookup_table[i].data());
}

return encoded;
}

void BindingData::PathToFileURL(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 2); // input
CHECK(args[0]->IsString());
CHECK(args[1]->IsBoolean());

Realm* realm = Realm::GetCurrent(args);
BindingData* binding_data = realm->GetBindingData<BindingData>();
Isolate* isolate = realm->isolate();
OS os = args[1]->IsTrue() ? OS::WINDOWS : OS::POSIX;

Utf8Value input(isolate, args[0]);
auto input_str = input.ToStringView();
CHECK(!input_str.empty());

auto out =
ada::parse<ada::url_aggregator>(EncodePathChars(input_str, os), nullptr);

if (!out) {
return ThrowInvalidURL(realm->env(), input.ToStringView(), nullptr);
}

if (os == OS::WINDOWS && args.Length() > 2 && !args[2]->IsUndefined())
[[unlikely]] {
CHECK(args[2]->IsString());
Utf8Value hostname(isolate, args[2]);
CHECK(out->set_hostname(hostname.ToStringView()));
}

binding_data->UpdateComponents(out->get_components(), out->type);

args.GetReturnValue().Set(
ToV8Value(realm->context(), out->get_href(), isolate).ToLocalChecked());
}

void BindingData::DomainToASCII(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_GE(args.Length(), 1); // input
Expand Down Expand Up @@ -371,6 +473,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "format", Format);
SetMethodNoSideEffect(isolate, target, "getOrigin", GetOrigin);
SetMethod(isolate, target, "parse", Parse);
SetMethod(isolate, target, "pathToFileURL", PathToFileURL);
SetMethod(isolate, target, "update", Update);
SetFastMethodNoSideEffect(
isolate, target, "canParse", CanParse, {fast_can_parse_methods_, 2});
Expand All @@ -391,6 +494,7 @@ void BindingData::RegisterExternalReferences(
registry->Register(Format);
registry->Register(GetOrigin);
registry->Register(Parse);
registry->Register(PathToFileURL);
registry->Register(Update);
registry->Register(CanParse);
registry->Register(FastCanParse);
Expand Down
1 change: 1 addition & 0 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class BindingData : public SnapshotableObject {
static void Format(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetOrigin(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Parse(const v8::FunctionCallbackInfo<v8::Value>& args);
static void PathToFileURL(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Update(const v8::FunctionCallbackInfo<v8::Value>& args);

static void CreatePerIsolateProperties(IsolateData* isolate_data,
Expand Down

0 comments on commit 6317f77

Please sign in to comment.