Skip to content

Commit

Permalink
add more functions for node:url
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Oct 17, 2024
1 parent b51a583 commit 1b00b96
Show file tree
Hide file tree
Showing 7 changed files with 717 additions and 18 deletions.
30 changes: 30 additions & 0 deletions src/node/internal/internal_errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,36 @@ export class ERR_ZLIB_INITIALIZATION_FAILED extends NodeError {
}
}

export class ERR_INVALID_URL extends NodeError {
input: string;

constructor(url: string) {
super('ERR_INVALID_URL', 'Invalid URL');
this.input = url;
}
}

export class ERR_INVALID_URL_SCHEME extends NodeError {
constructor(scheme: string) {
super('ERR_INVALID_URL_SCHEME', `The URL must be of scheme ${scheme}`);
}
}

export class ERR_INVALID_FILE_URL_HOST extends NodeError {
constructor(input: string) {
super(
'ERR_INVALID_FILE_URL_HOST',
`File URL host must be "localhost" or empty on ${input}`
);
}
}

export class ERR_INVALID_FILE_URL_PATH extends NodeError {
constructor(input: string) {
super('ERR_INVALID_FILE_URL_PATH', `File URL path ${input}`);
}
}

export function aggregateTwoErrors(innerError: any, outerError: any) {
if (innerError && outerError && innerError !== outerError) {
if (Array.isArray(outerError.errors)) {
Expand Down
80 changes: 68 additions & 12 deletions src/node/internal/internal_path.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1071,21 +1071,37 @@ const posix = {
resolve(...args: string[]): string {
let resolvedPath = '';
let resolvedAbsolute = false;
let slashCheck = false;

for (let i = args.length - 1; i >= -1 && !resolvedAbsolute; i--) {
const path = i >= 0 ? args[i] : '/';

validateString(path, 'path');
for (let i = args.length - 1; i >= 0 && !resolvedAbsolute; i--) {
const path = args[i];
validateString(path, `paths[${i}]`);

// Skip empty entries
if (path.length === 0) {
continue;
}
if (
i === args.length - 1 &&
isPosixPathSeparator(path.charCodeAt(path.length - 1))
) {
slashCheck = true;
}

resolvedPath = `${path}/${resolvedPath}`;
if (resolvedPath.length !== 0) {
resolvedPath = `${path}/${resolvedPath}`;
} else {
resolvedPath = path;
}
resolvedAbsolute = path.charCodeAt(0) === CHAR_FORWARD_SLASH;
}

if (!resolvedAbsolute) {
const cwd = '/';
resolvedPath = `${cwd}/${resolvedPath}`;
resolvedAbsolute = cwd.charCodeAt(0) === CHAR_FORWARD_SLASH;
}

// At this point the path should be resolved to a full absolute path, but
// handle relative paths to be safe (might happen when process.cwd() fails)

Expand All @@ -1097,10 +1113,20 @@ const posix = {
isPosixPathSeparator
);

if (resolvedAbsolute) {
return `/${resolvedPath}`;
if (!resolvedAbsolute) {
if (resolvedPath.length === 0) {
return '.';
}
if (slashCheck) {
return `${resolvedPath}/`;
}
return resolvedPath;
}

if (resolvedPath.length === 0 || resolvedPath === '/') {
return '/';
}
return resolvedPath.length > 0 ? resolvedPath : '.';
return slashCheck ? `/${resolvedPath}/` : `/${resolvedPath}`;
},

/**
Expand Down Expand Up @@ -1173,11 +1199,41 @@ const posix = {

if (from === to) return '';

const fromStart = 1;
const fromEnd = from.length;
// Trim any leading slashes
let fromStart = 0;
while (
fromStart < from.length &&
from.charCodeAt(fromStart) === CHAR_FORWARD_SLASH
) {
fromStart++;
}
// Trim trailing slashes
let fromEnd = from.length;
while (
fromEnd - 1 > fromStart &&
from.charCodeAt(fromEnd - 1) === CHAR_FORWARD_SLASH
) {
fromEnd--;
}
const fromLen = fromEnd - fromStart;
const toStart = 1;
const toLen = to.length - toStart;

// Trim any leading slashes
let toStart = 0;
while (
toStart < to.length &&
to.charCodeAt(toStart) === CHAR_FORWARD_SLASH
) {
toStart++;
}
// Trim trailing slashes
let toEnd = to.length;
while (
toEnd - 1 > toStart &&
to.charCodeAt(toEnd - 1) === CHAR_FORWARD_SLASH
) {
toEnd--;
}
const toLen = toEnd - toStart;

// Compare paths to find the longest common path from root
const length = fromLen < toLen ? fromLen : toLen;
Expand Down
226 changes: 226 additions & 0 deletions src/node/internal/internal_url.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0
// Copyright Joyent and Node contributors. All rights reserved. MIT license.

import {
ERR_INVALID_FILE_URL_HOST,
ERR_INVALID_FILE_URL_PATH,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_ARG_VALUE,
ERR_INVALID_URL_SCHEME,
} from 'node-internal:internal_errors';
import { default as urlUtil } from 'node-internal:url';
import { CHAR_LOWERCASE_A, CHAR_LOWERCASE_Z } from 'node-internal:constants';
import {
win32 as pathWin32,
posix as pathPosix,
} from 'node-internal:internal_path';

const FORWARD_SLASH = /\//g;

// The following characters are percent-encoded when converting from file path
// to URL:
// - %: The percent character is the only character not encoded by the
// `pathname` setter.
// - \: Backslash is encoded on non-windows platforms since it's a valid
// character but the `pathname` setters replaces it by a forward slash.
// - LF: The newline character is stripped out by the `pathname` setter.
// (See whatwg/url#419)
// - CR: The carriage return character is also stripped out by the `pathname`
// setter.
// - TAB: The tab character is also stripped out by the `pathname` setter.
const percentRegEx = /%/g;
const backslashRegEx = /\\/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const questionRegex = /\?/g;
const hashRegex = /#/g;

function encodePathChars(
filepath: string,
options?: { windows?: boolean | undefined }
): string {
const windows = options?.windows;
if (filepath.indexOf('%') !== -1)
filepath = filepath.replace(percentRegEx, '%25');
// In posix, backslash is a valid character in paths:
if (!windows && filepath.indexOf('\\') !== -1)
filepath = filepath.replace(backslashRegEx, '%5C');
if (filepath.indexOf('\n') !== -1)
filepath = filepath.replace(newlineRegEx, '%0A');
if (filepath.indexOf('\r') !== -1)
filepath = filepath.replace(carriageReturnRegEx, '%0D');
if (filepath.indexOf('\t') !== -1)
filepath = filepath.replace(tabRegEx, '%09');
return filepath;
}

/**
* Checks if a value has the shape of a WHATWG URL object.
*
* Using a symbol or instanceof would not be able to recognize URL objects
* coming from other implementations (e.g. in Electron), so instead we are
* checking some well known properties for a lack of a better test.
*
* We use `href` and `protocol` as they are the only properties that are
* easy to retrieve and calculate due to the lazy nature of the getters.
*
* We check for `auth` and `path` attribute to distinguish legacy url instance with
* WHATWG URL instance.
*/
/* eslint-disable */
export function isURL(self?: any): self is URL {
return Boolean(
self?.href &&
self.protocol &&
self.auth === undefined &&
self.path === undefined
);
}
/* eslint-enable */

export function getPathFromURLPosix(url: URL): string {
if (url.hostname !== '') {
// Note: Difference between Node.js and Workerd.
// Node.js uses `process.platform` whereas workerd hard codes it to linux.
// This is done to avoid confusion regarding non-linux support and conformance.
throw new ERR_INVALID_FILE_URL_HOST('linux');
}
const pathname = url.pathname;
for (let n = 0; n < pathname.length; n++) {
if (pathname[n] === '%') {
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
const third = pathname.codePointAt(n + 2) as number | 0x20;
if (pathname[n + 1] === '2' && third === 102) {
throw new ERR_INVALID_FILE_URL_PATH(
'must not include encoded / characters'
);
}
}
}
return decodeURIComponent(pathname);
}

export function getPathFromURLWin32(url: URL): string {
const hostname = url.hostname;
let pathname = url.pathname;
for (let n = 0; n < pathname.length; n++) {
if (pathname[n] === '%') {
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
const third = pathname.codePointAt(n + 2) as number | 0x20;
if (
(pathname[n + 1] === '2' && third === 102) || // 2f 2F /
(pathname[n + 1] === '5' && third === 99)
) {
// 5c 5C \
throw new ERR_INVALID_FILE_URL_PATH(
'must not include encoded \\ or / characters'
);
}
}
}
pathname = pathname.replace(FORWARD_SLASH, '\\');
pathname = decodeURIComponent(pathname);
if (hostname !== '') {
// If hostname is set, then we have a UNC path
// Pass the hostname through domainToUnicode just in case
// it is an IDN using punycode encoding. We do not need to worry
// about percent encoding because the URL parser will have
// already taken care of that for us. Note that this only
// causes IDNs with an appropriate `xn--` prefix to be decoded.
return `\\\\${urlUtil.domainToUnicode(hostname)}${pathname}`;
}
// Otherwise, it's a local path that requires a drive letter
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
const letter = pathname.codePointAt(1) as number | 0x20;
const sep = pathname.charAt(2);
if (
letter < CHAR_LOWERCASE_A ||
letter > CHAR_LOWERCASE_Z || // a..z A..Z
sep !== ':'
) {
throw new ERR_INVALID_FILE_URL_PATH('must be absolute');
}
return pathname.slice(1);
}

export function fileURLToPath(
input: string | URL,
options?: { windows?: boolean }
): string {
const windows = options?.windows;
let path: URL;
if (typeof input === 'string') {
path = new URL(input);
} else if (!isURL(input)) {
throw new ERR_INVALID_ARG_TYPE('path', ['string', 'URL'], input);
} else {
path = input;
}
if (path.protocol !== 'file:') {
throw new ERR_INVALID_URL_SCHEME('file');
}
return windows ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}

export function pathToFileURL(
filepath: string,
options?: { windows?: boolean }
): URL {
const windows = options?.windows;
// IMPORTANT: Difference between Node.js and workerd.
// The following check does not exist in Node.js due to primordial usage.
if (typeof filepath !== 'string') {
throw new ERR_INVALID_ARG_TYPE('filepath', 'string', filepath);
}
if (windows && filepath.startsWith('\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
// "\\?\UNC\" path prefix should be ignored.
// Ref: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
const isExtendedUNC = filepath.startsWith('\\\\?\\UNC\\');
const prefixLength = isExtendedUNC ? 8 : 2;
const hostnameEndIndex = filepath.indexOf('\\', prefixLength);
if (hostnameEndIndex === -1) {
throw new ERR_INVALID_ARG_VALUE(
'path',
filepath,
'Missing UNC resource path'
);
}
if (hostnameEndIndex === 2) {
throw new ERR_INVALID_ARG_VALUE('path', filepath, 'Empty UNC servername');
}
const hostname = filepath.slice(prefixLength, hostnameEndIndex);
outURL.hostname = urlUtil.domainToASCII(hostname);
outURL.pathname = encodePathChars(
filepath.slice(hostnameEndIndex).replace(backslashRegEx, '/'),
{ windows }
);
return outURL;
}
let resolved = windows
? pathWin32.resolve(filepath)
: pathPosix.resolve(filepath);

// Call encodePathChars first to avoid encoding % again for ? and #.
resolved = encodePathChars(resolved, { windows });

// Question and hash character should be included in pathname.
// Therefore, encoding is required to eliminate parsing them in different states.
// This is done as an optimization to not creating a URL instance and
// later triggering pathname setter, which impacts performance
if (resolved.indexOf('?') !== -1)
resolved = resolved.replace(questionRegex, '%3F');
if (resolved.indexOf('#') !== -1)
resolved = resolved.replace(hashRegex, '%23');
return new URL(`file://${resolved}`);
}

export function toPathIfFileURL(fileURLOrPath: URL | string): string {
if (!isURL(fileURLOrPath)) return fileURLOrPath;
return fileURLToPath(fileURLOrPath);
}
Loading

0 comments on commit 1b00b96

Please sign in to comment.