From e5385e5a99bba50c794b554455b2dd7b957904c4 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 6 Feb 2023 08:30:55 -0500 Subject: [PATCH] url: replace url-parser with ada PR-URL: https://github.com/nodejs/node/pull/46410 Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum Reviewed-By: Tiancheng "Timothy" Gu Reviewed-By: Matteo Collina Reviewed-By: Rafael Gonzaga Reviewed-By: Robert Nagy --- lib/internal/url.js | 499 ++--- node.gyp | 2 - src/crypto/crypto_common.cc | 7 +- src/inspector_agent.cc | 5 +- src/module_wrap.cc | 3 - src/node_api.cc | 2 +- src/node_metadata.cc | 2 + src/node_metadata.h | 1 + src/node_url.cc | 1937 ++--------------- src/node_url.h | 184 +- src/node_url_tables.cc | 448 ---- test/benchmark/test-benchmark-url.js | 14 +- test/cctest/test_url.cc | 188 -- test/fuzzers/fuzz_url.cc | 11 - test/parallel/test-process-versions.js | 1 + .../test-whatwg-url-custom-inspect.js | 20 +- test/parallel/test-whatwg-url-properties.js | 13 +- 17 files changed, 376 insertions(+), 2961 deletions(-) delete mode 100644 src/node_url_tables.cc delete mode 100644 test/cctest/test_url.cc delete mode 100644 test/fuzzers/fuzz_url.cc diff --git a/lib/internal/url.js b/lib/internal/url.js index e23afe3f93e632a..dbec51826e18d6a 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -18,11 +18,9 @@ const { ObjectGetPrototypeOf, ObjectKeys, ObjectPrototypeHasOwnProperty, - ReflectApply, ReflectGetOwnPropertyDescriptor, ReflectOwnKeys, RegExpPrototypeSymbolReplace, - String, StringPrototypeCharAt, StringPrototypeCharCodeAt, StringPrototypeCodePointAt, @@ -92,24 +90,8 @@ const isWindows = platform === 'win32'; const { domainToASCII: _domainToASCII, domainToUnicode: _domainToUnicode, - encodeAuth, parse, - URL_FLAGS_CANNOT_BE_BASE, - URL_FLAGS_HAS_FRAGMENT, - URL_FLAGS_HAS_HOST, - URL_FLAGS_HAS_PASSWORD, - URL_FLAGS_HAS_PATH, - URL_FLAGS_HAS_QUERY, - URL_FLAGS_HAS_USERNAME, - URL_FLAGS_IS_DEFAULT_SCHEME_PORT, - URL_FLAGS_SPECIAL, - kFragment, - kHost, - kHostname, - kPathStart, - kPort, - kQuery, - kSchemeStart + updateUrl, } = internalBinding('url'); const { @@ -120,13 +102,21 @@ const { const FORWARD_SLASH = /\//g; const context = Symbol('context'); -const cannotBeBase = Symbol('cannot-be-base'); -const cannotHaveUsernamePasswordPort = - Symbol('cannot-have-username-password-port'); -const special = Symbol('special'); const searchParams = Symbol('query'); const kFormat = Symbol('format'); +const updateActions = { + kProtocol: 0, + kHost: 1, + kHostname: 2, + kPort: 3, + kUsername: 4, + kPassword: 5, + kPathname: 6, + kSearch: 7, + kHash: 8, + kHref: 9, +}; let blob; let cryptoRandom; @@ -145,14 +135,6 @@ function lazyCryptoRandom() { return cryptoRandom; } -// Refs: https://html.spec.whatwg.org/multipage/browsers.html#concept-origin-opaque -const kOpaqueOrigin = 'null'; - -// Refs: https://html.spec.whatwg.org/multipage/browsers.html#ascii-serialisation-of-an-origin -function serializeTupleOrigin(scheme, host, port) { - return `${scheme}//${host}${port === null ? '' : `:${port}`}`; -} - // This class provides the internal state of a URL object. An instance of this // class is stored in every URL object and is accessed internally by setters // and getters. It roughly corresponds to the concept of a URL record in the @@ -160,17 +142,19 @@ function serializeTupleOrigin(scheme, host, port) { // the C++ binding. // Refs: https://url.spec.whatwg.org/#concept-url class URLContext { - constructor() { - this.flags = 0; - this.scheme = ':'; - this.username = ''; - this.password = ''; - this.host = null; - this.port = null; - this.path = []; - this.query = null; - this.fragment = null; - } + href = ''; + origin = ''; + protocol = ''; + host = ''; + hostname = ''; + pathname = ''; + search = ''; + username = ''; + password = ''; + port = ''; + hash = ''; + hasHost = false; + hasOpaquePath = false; } function isURLSearchParams(self) { @@ -247,7 +231,6 @@ class URLSearchParams { } else { // USVString init = toUSVString(init); - if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); } @@ -550,129 +533,68 @@ ObjectDefineProperties(URLSearchParams.prototype, { }, }); -function onParseComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - ctx.flags = flags; - ctx.scheme = protocol; - ctx.username = (flags & URL_FLAGS_HAS_USERNAME) !== 0 ? username : ''; - ctx.password = (flags & URL_FLAGS_HAS_PASSWORD) !== 0 ? password : ''; - ctx.port = port; - ctx.path = (flags & URL_FLAGS_HAS_PATH) !== 0 ? path : []; - ctx.query = query; - ctx.fragment = fragment; - ctx.host = host; - if (!this[searchParams]) { // Invoked from URL constructor - this[searchParams] = new URLSearchParams(); - this[searchParams][context] = this; - } - initSearchParams(this[searchParams], query); -} - function isURLThis(self) { return self != null && ObjectPrototypeHasOwnProperty(self, context); } -function onParseError(input, flags) { - throw new ERR_INVALID_URL(input); -} +function constructHref(ctx, options) { + if (options) + validateObject(options, 'options'); -function onParseProtocolComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_SPECIAL) !== 0) { - ctx.flags |= URL_FLAGS_SPECIAL; - } else { - ctx.flags &= ~URL_FLAGS_SPECIAL; - } - ctx.scheme = protocol; - ctx.port = port; -} - -function onParseHostnameComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_HAS_HOST) !== 0) { - ctx.host = host; - ctx.flags |= URL_FLAGS_HAS_HOST; - } else { - ctx.host = null; - ctx.flags &= ~URL_FLAGS_HAS_HOST; - } -} - -function onParsePortComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].port = port; -} - -function onParseHostComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - ReflectApply(onParseHostnameComplete, this, arguments); - if (port !== null || ((flags & URL_FLAGS_IS_DEFAULT_SCHEME_PORT) !== 0)) - ReflectApply(onParsePortComplete, this, arguments); -} - -function onParsePathComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - const ctx = this[context]; - if ((flags & URL_FLAGS_HAS_PATH) !== 0) { - ctx.path = path; - ctx.flags |= URL_FLAGS_HAS_PATH; - } else { - ctx.path = []; - ctx.flags &= ~URL_FLAGS_HAS_PATH; - } - - // The C++ binding may set host to empty string. - if ((flags & URL_FLAGS_HAS_HOST) !== 0) { - ctx.host = host; - ctx.flags |= URL_FLAGS_HAS_HOST; - } -} - -function onParseSearchComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].query = query; -} - -function onParseHashComplete(flags, protocol, username, password, - host, port, path, query, fragment) { - this[context].fragment = fragment; -} + options = { + fragment: true, + unicode: false, + search: true, + auth: true, + ...options + }; -function isURLThis(self) { - return (self !== undefined && self !== null && self[context] !== undefined); + // https://url.spec.whatwg.org/#url-serializing + let ret = ctx.protocol; + if (ctx.hasHost) { + ret += '//'; + const hasUsername = ctx.username !== ''; + const hasPassword = ctx.password !== ''; + if (options.auth && (hasUsername || hasPassword)) { + if (hasUsername) + ret += ctx.username; + if (hasPassword) + ret += `:${ctx.password}`; + ret += '@'; + } + ret += options.unicode ? + domainToUnicode(ctx.hostname) : ctx.hostname; + if (ctx.port !== '') + ret += `:${ctx.port}`; + } else if (!ctx.hasOpaquePath && ctx.pathname.lastIndexOf('/') !== 0 && ctx.pathname.startsWith('//')) { + ret += '/.'; + } + ret += ctx.pathname; + if (options.search) + ret += ctx.search; + if (options.fragment) + ret += ctx.hash; + return ret; } class URL { constructor(input, base = undefined) { // toUSVString is not needed. input = `${input}`; - let base_context; - if (base !== undefined) { - base_context = new URL(base)[context]; - } this[context] = new URLContext(); - parse(input, -1, base_context, undefined, - FunctionPrototypeBind(onParseComplete, this), - FunctionPrototypeBind(onParseError, this, input)); - } + this.#onParseComplete = FunctionPrototypeBind(this.#onParseComplete, this); - get [special]() { - return (this[context].flags & URL_FLAGS_SPECIAL) !== 0; - } + if (base !== undefined) { + base = `${base}`; + } - get [cannotBeBase]() { - return (this[context].flags & URL_FLAGS_CANNOT_BE_BASE) !== 0; - } + const isValid = parse(input, + base, + this.#onParseComplete); - // https://url.spec.whatwg.org/#cannot-have-a-username-password-port - get [cannotHaveUsernamePasswordPort]() { - const { host, scheme } = this[context]; - return ((host == null || host === '') || - this[cannotBeBase] || - scheme === 'file:'); + if (!isValid) { + throw new ERR_INVALID_URL(input); + } } [inspect.custom](depth, opts) { @@ -701,8 +623,6 @@ class URL { obj.hash = this.hash; if (opts.showHidden) { - obj.cannotBeBase = this[cannotBeBase]; - obj.special = this[special]; obj[context] = this[context]; } @@ -710,117 +630,71 @@ class URL { } [kFormat](options) { - if (options) - validateObject(options, 'options'); - - options = { - fragment: true, - unicode: false, - search: true, - auth: true, - ...options - }; + // TODO(@anonrig): Replace kFormat with actually calling setters. + return constructHref(this[context], options); + } + + #onParseComplete = (href, origin, protocol, host, hostname, pathname, + search, username, password, port, hash, hasHost, + hasOpaquePath) => { const ctx = this[context]; - // https://url.spec.whatwg.org/#url-serializing - let ret = ctx.scheme; - if (ctx.host !== null) { - ret += '//'; - const has_username = ctx.username !== ''; - const has_password = ctx.password !== ''; - if (options.auth && (has_username || has_password)) { - if (has_username) - ret += ctx.username; - if (has_password) - ret += `:${ctx.password}`; - ret += '@'; - } - ret += options.unicode ? - domainToUnicode(ctx.host) : ctx.host; - if (ctx.port !== null) - ret += `:${ctx.port}`; - } - if (this[cannotBeBase]) { - ret += ctx.path[0]; - } else { - if (ctx.host === null && ctx.path.length > 1 && ctx.path[0] === '') { - ret += '/.'; - } - if (ctx.path.length) { - ret += '/' + ArrayPrototypeJoin(ctx.path, '/'); - } + ctx.href = href; + ctx.origin = origin; + ctx.protocol = protocol; + ctx.host = host; + ctx.hostname = hostname; + ctx.pathname = pathname; + ctx.search = search; + ctx.username = username; + ctx.password = password; + ctx.port = port; + ctx.hash = hash; + // TODO(@anonrig): Remove hasHost and hasOpaquePath when kFormat is removed. + ctx.hasHost = hasHost; + ctx.hasOpaquePath = hasOpaquePath; + if (!this[searchParams]) { // Invoked from URL constructor + this[searchParams] = new URLSearchParams(); + this[searchParams][context] = this; } - if (options.search && ctx.query !== null) - ret += `?${ctx.query}`; - if (options.fragment && ctx.fragment !== null) - ret += `#${ctx.fragment}`; - return ret; - } + initSearchParams(this[searchParams], ctx.search); + }; - // https://heycam.github.io/webidl/#es-stringifier toString() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } get href() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } - set href(input) { + set href(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - input = `${input}`; - parse(input, -1, undefined, undefined, - FunctionPrototypeBind(onParseComplete, this), - FunctionPrototypeBind(onParseError, this, input)); + const valid = updateUrl(this[context].href, updateActions.kHref, `${value}`, this.#onParseComplete); + if (!valid) { throw ERR_INVALID_URL(`${value}`); } } // readonly get origin() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // Refs: https://url.spec.whatwg.org/#concept-url-origin - const ctx = this[context]; - switch (ctx.scheme) { - case 'blob:': - if (ctx.path.length > 0) { - try { - return (new URL(ctx.path[0])).origin; - } catch { - // Fall through... do nothing - } - } - return kOpaqueOrigin; - case 'ftp:': - case 'http:': - case 'https:': - case 'ws:': - case 'wss:': - return serializeTupleOrigin(ctx.scheme, ctx.host, ctx.port); - } - return kOpaqueOrigin; + return this[context].origin; } get protocol() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].scheme; + return this[context].protocol; } - set protocol(scheme) { + set protocol(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - scheme = `${scheme}`; - if (scheme.length === 0) - return; - const ctx = this[context]; - parse(scheme, kSchemeStart, null, ctx, - FunctionPrototypeBind(onParseProtocolComplete, this)); + updateUrl(this[context].href, updateActions.kProtocol, `${value}`, this.#onParseComplete); } get username() { @@ -829,21 +703,10 @@ class URL { return this[context].username; } - set username(username) { + set username(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - username = `${username}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (username === '') { - ctx.username = ''; - ctx.flags &= ~URL_FLAGS_HAS_USERNAME; - return; - } - ctx.username = encodeAuth(username); - ctx.flags |= URL_FLAGS_HAS_USERNAME; + updateUrl(this[context].href, updateActions.kUsername, `${value}`, this.#onParseComplete); } get password() { @@ -852,138 +715,72 @@ class URL { return this[context].password; } - set password(password) { + set password(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - password = `${password}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (password === '') { - ctx.password = ''; - ctx.flags &= ~URL_FLAGS_HAS_PASSWORD; - return; - } - ctx.password = encodeAuth(password); - ctx.flags |= URL_FLAGS_HAS_PASSWORD; + updateUrl(this[context].href, updateActions.kPassword, `${value}`, this.#onParseComplete); } get host() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - let ret = ctx.host || ''; - if (ctx.port !== null) - ret += `:${ctx.port}`; - return ret; + return this[context].host; } - set host(host) { + set host(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - host = `${host}`; - if (this[cannotBeBase]) { - // Cannot set the host if cannot-be-base is set - return; - } - parse(host, kHost, null, ctx, - FunctionPrototypeBind(onParseHostComplete, this)); + updateUrl(this[context].href, updateActions.kHost, `${value}`, this.#onParseComplete); } get hostname() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[context].host || ''; + return this[context].hostname; } - set hostname(host) { + set hostname(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - host = `${host}`; - if (this[cannotBeBase]) { - // Cannot set the host if cannot-be-base is set - return; - } - parse(host, kHostname, null, ctx, onParseHostnameComplete.bind(this)); + updateUrl(this[context].href, updateActions.kHostname, `${value}`, this.#onParseComplete); } get port() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const port = this[context].port; - return port === null ? '' : String(port); + return this[context].port; } - set port(port) { + set port(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - port = `${port}`; - if (this[cannotHaveUsernamePasswordPort]) - return; - const ctx = this[context]; - if (port === '') { - ctx.port = null; - return; - } - parse(port, kPort, null, ctx, - FunctionPrototypeBind(onParsePortComplete, this)); + updateUrl(this[context].href, updateActions.kPort, `${value}`, this.#onParseComplete); } get pathname() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - if (this[cannotBeBase]) - return ctx.path[0]; - if (ctx.path.length === 0) - return ''; - return `/${ArrayPrototypeJoin(ctx.path, '/')}`; + return this[context].pathname; } - set pathname(path) { + set pathname(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - // toUSVString is not needed. - path = `${path}`; - if (this[cannotBeBase]) - return; - parse(path, kPathStart, null, this[context], - onParsePathComplete.bind(this)); + updateUrl(this[context].href, updateActions.kPathname, `${value}`, this.#onParseComplete); } get search() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const { query } = this[context]; - if (query === null || query === '') - return ''; - return `?${query}`; + return this[context].search; } set search(search) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; search = toUSVString(search); - if (search === '') { - ctx.query = null; - ctx.flags &= ~URL_FLAGS_HAS_QUERY; - } else { - if (search[0] === '?') search = StringPrototypeSlice(search, 1); - ctx.query = ''; - ctx.flags |= URL_FLAGS_HAS_QUERY; - if (search) { - parse(search, kQuery, null, ctx, - FunctionPrototypeBind(onParseSearchComplete, this)); - } - } - initSearchParams(this[searchParams], search); + updateUrl(this[context].href, updateActions.kSearch, search, this.#onParseComplete); + initSearchParams(this[searchParams], this[context].search); } // readonly @@ -996,34 +793,19 @@ class URL { get hash() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const { fragment } = this[context]; - if (fragment === null || fragment === '') - return ''; - return `#${fragment}`; + return this[context].hash; } - set hash(hash) { + set hash(value) { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - const ctx = this[context]; - // toUSVString is not needed. - hash = `${hash}`; - if (!hash) { - ctx.fragment = null; - ctx.flags &= ~URL_FLAGS_HAS_FRAGMENT; - return; - } - if (hash[0] === '#') hash = StringPrototypeSlice(hash, 1); - ctx.fragment = ''; - ctx.flags |= URL_FLAGS_HAS_FRAGMENT; - parse(hash, kFragment, null, ctx, - FunctionPrototypeBind(onParseHashComplete, this)); + updateUrl(this[context].href, updateActions.kHash, `${value}`, this.#onParseComplete); } toJSON() { if (!isURLThis(this)) throw new ERR_INVALID_THIS('URL'); - return this[kFormat](); + return this[context].href; } static createObjectURL(obj) { @@ -1031,9 +813,6 @@ class URL { if (cryptoRandom === undefined) throw new ERR_NO_CRYPTO(); - // Yes, lazy loading is annoying but because of circular - // references between the url, internal/blob, and buffer - // modules, lazy loading here makes sure that things work. const blob = lazyBlob(); if (!blob.isBlob(obj)) throw new ERR_INVALID_ARG_TYPE('obj', 'Blob', obj); @@ -1048,6 +827,7 @@ class URL { static revokeObjectURL(url) { url = `${url}`; try { + // TODO(@anonrig): Remove this try/catch by calling `parse` directly. const parsed = new URL(url); const split = StringPrototypeSplit(parsed.pathname, ':'); if (split.length === 2) @@ -1088,13 +868,12 @@ function update(url, params) { const ctx = url[context]; const serializedParams = params.toString(); - if (serializedParams) { - ctx.query = serializedParams; - ctx.flags |= URL_FLAGS_HAS_QUERY; + if (serializedParams.length > 0) { + ctx.search = '?' + serializedParams; } else { - ctx.query = null; - ctx.flags &= ~URL_FLAGS_HAS_QUERY; + ctx.search = ''; } + ctx.href = constructHref(ctx); } function initSearchParams(url, init) { @@ -1109,14 +888,14 @@ function initSearchParams(url, init) { // Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser function parseParams(qs) { const out = []; - let pairStart = 0; - let lastPos = 0; let seenSep = false; let buf = ''; let encoded = false; let encodeCheck = 0; - let i; - for (i = 0; i < qs.length; ++i) { + let i = qs[0] === '?' ? 1 : 0; + let pairStart = i; + let lastPos = i; + for (; i < qs.length; ++i) { const code = StringPrototypeCharCodeAt(qs, i); // Try matching key/value pair separator diff --git a/node.gyp b/node.gyp index 2cc1e81c5b1cfb0..8239f504c27cd8f 100644 --- a/node.gyp +++ b/node.gyp @@ -542,7 +542,6 @@ 'src/node_trace_events.cc', 'src/node_types.cc', 'src/node_url.cc', - 'src/node_url_tables.cc', 'src/node_util.cc', 'src/node_v8.cc', 'src/node_wasi.cc', @@ -1256,7 +1255,6 @@ 'test/cctest/test_sockaddr.cc', 'test/cctest/test_traced_value.cc', 'test/cctest/test_util.cc', - 'test/cctest/test_url.cc', ], 'conditions': [ diff --git a/src/crypto/crypto_common.cc b/src/crypto/crypto_common.cc index 59acdd82096fc88..41e607e9298314b 100644 --- a/src/crypto/crypto_common.cc +++ b/src/crypto/crypto_common.cc @@ -1,13 +1,12 @@ +#include "crypto/crypto_common.h" #include "base_object-inl.h" #include "env-inl.h" +#include "memory_tracker-inl.h" +#include "node.h" #include "node_buffer.h" #include "node_crypto.h" -#include "crypto/crypto_common.h" -#include "node.h" #include "node_internals.h" -#include "node_url.h" #include "string_bytes.h" -#include "memory_tracker-inl.h" #include "v8.h" #include diff --git a/src/inspector_agent.cc b/src/inspector_agent.cc index 90ab0c2c321d372..214f992c9d01559 100644 --- a/src/inspector_agent.cc +++ b/src/inspector_agent.cc @@ -647,8 +647,9 @@ class NodeInspectorClient : public V8InspectorClient { protocol::StringUtil::StringViewToUtf8(resource_name_view); if (!IsFilePath(resource_name)) return nullptr; - node::url::URL url = node::url::URL::FromFilePath(resource_name); - return Utf8ToStringView(url.href()); + + std::string url = node::url::FromFilePath(resource_name); + return Utf8ToStringView(url); } node::Environment* env_; diff --git a/src/module_wrap.cc b/src/module_wrap.cc index 73ce4aa42035a15..0645b3ddf506df2 100644 --- a/src/module_wrap.cc +++ b/src/module_wrap.cc @@ -6,7 +6,6 @@ #include "node_errors.h" #include "node_internals.h" #include "node_process-inl.h" -#include "node_url.h" #include "node_watchdog.h" #include "util-inl.h" @@ -20,8 +19,6 @@ namespace loader { using errors::TryCatchScope; using node::contextify::ContextifyContext; -using node::url::URL; -using node::url::URL_FLAGS_FAILED; using v8::Array; using v8::ArrayBufferView; using v8::Context; diff --git a/src/node_api.cc b/src/node_api.cc index 49234a23dce8008..1180188553d9dd0 100644 --- a/src/node_api.cc +++ b/src/node_api.cc @@ -657,7 +657,7 @@ void napi_module_register_by_symbol(v8::Local exports, // a file system path. // TODO(gabrielschulhof): Pass the `filename` through unchanged if/when we // receive it as a URL already. - module_filename = node::url::URL::FromFilePath(filename.ToString()).href(); + module_filename = node::url::FromFilePath(filename.ToString()); } // Create a new napi_env for this specific module. diff --git a/src/node_metadata.cc b/src/node_metadata.cc index ed28871c385532b..6fe09f843e26b7f 100644 --- a/src/node_metadata.cc +++ b/src/node_metadata.cc @@ -1,5 +1,6 @@ #include "node_metadata.h" #include "acorn_version.h" +#include "ada.h" #include "ares.h" #include "brotli/encode.h" #include "llhttp.h" @@ -115,6 +116,7 @@ Metadata::Versions::Versions() { #endif simdutf = SIMDUTF_VERSION; + ada = ADA_VERSION; } Metadata::Release::Release() : name(NODE_RELEASE) { diff --git a/src/node_metadata.h b/src/node_metadata.h index 2a924141d6edbaa..1831bfd0baaac70 100644 --- a/src/node_metadata.h +++ b/src/node_metadata.h @@ -47,6 +47,7 @@ namespace node { V(uvwasi) \ V(acorn) \ V(simdutf) \ + V(ada) \ NODE_VERSIONS_KEY_UNDICI(V) #if HAVE_OPENSSL diff --git a/src/node_url.cc b/src/node_url.cc index 5d710c345d7357e..34e8d96ba3b9317 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -1,37 +1,24 @@ #include "node_url.h" +#include "ada.h" #include "base_object-inl.h" #include "node_errors.h" #include "node_external_reference.h" #include "node_i18n.h" #include "util-inl.h" -#include #include #include -#include -#include namespace node { -using url::table_data::hex; -using url::table_data::C0_CONTROL_ENCODE_SET; -using url::table_data::FRAGMENT_ENCODE_SET; -using url::table_data::PATH_ENCODE_SET; -using url::table_data::USERINFO_ENCODE_SET; -using url::table_data::QUERY_ENCODE_SET_NONSPECIAL; -using url::table_data::QUERY_ENCODE_SET_SPECIAL; - -using v8::Array; +using v8::Boolean; using v8::Context; using v8::Function; using v8::FunctionCallbackInfo; using v8::HandleScope; -using v8::Int32; -using v8::Integer; using v8::Isolate; using v8::Local; using v8::NewStringType; -using v8::Null; using v8::Object; using v8::String; using v8::Undefined; @@ -47,1778 +34,237 @@ Local Utf8String(Isolate* isolate, const std::string& str) { namespace url { namespace { -// https://url.spec.whatwg.org/#eof-code-point -constexpr char kEOL = -1; - -// https://url.spec.whatwg.org/#concept-host -class URLHost { - public: - ~URLHost(); - - void ParseIPv4Host(const char* input, size_t length); - void ParseIPv6Host(const char* input, size_t length); - void ParseOpaqueHost(const char* input, size_t length); - void ParseHost(const char* input, - size_t length, - bool is_special, - bool unicode = false); - - bool ParsingFailed() const { return type_ == HostType::H_FAILED; } - std::string ToString() const; - // Like ToString(), but avoids a copy in exchange for invalidating `*this`. - std::string ToStringMove(); - - private: - enum class HostType { - H_FAILED, - H_DOMAIN, - H_IPV4, - H_IPV6, - H_OPAQUE, - }; - - union Value { - std::string domain_or_opaque; - uint32_t ipv4; - uint16_t ipv6[8]; - - ~Value() {} - Value() : ipv4(0) {} - }; - - Value value_; - HostType type_ = HostType::H_FAILED; - - void Reset() { - using string = std::string; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - value_.domain_or_opaque.~string(); - break; - default: - break; - } - type_ = HostType::H_FAILED; - } - - // Setting the string members of the union with = is brittle because - // it relies on them being initialized to a state that requires no - // destruction of old data. - // For a long time, that worked well enough because ParseIPv6Host() happens - // to zero-fill `value_`, but that really is relying on standard library - // internals too much. - // These helpers are the easiest solution but we might want to consider - // just not forcing strings into an union. - void SetOpaque(std::string&& string) { - Reset(); - type_ = HostType::H_OPAQUE; - new(&value_.domain_or_opaque) std::string(std::move(string)); - } - - void SetDomain(std::string&& string) { - Reset(); - type_ = HostType::H_DOMAIN; - new(&value_.domain_or_opaque) std::string(std::move(string)); - } -}; - -URLHost::~URLHost() { - Reset(); -} - -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) \ - XX(ARG_COUNT) // This one has to be last. - -enum url_cb_args { -#define XX(name) name, - ARGS(XX) -#undef XX +enum url_update_action { + kProtocol = 0, + kHost = 1, + kHostname = 2, + kPort = 3, + kUsername = 4, + kPassword = 5, + kPathname = 6, + kSearch = 7, + kHash = 8, + kHref = 9, }; -#define TWO_CHAR_STRING_TEST(bits, name, expr) \ - template \ - bool name(const T ch1, const T ch2) { \ - static_assert(sizeof(ch1) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return (expr); \ - } \ - template \ - bool name(const std::basic_string& str) { \ - static_assert(sizeof(str[0]) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return str.length() >= 2 && name(str[0], str[1]); \ - } - -// https://infra.spec.whatwg.org/#ascii-tab-or-newline -CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) - -// https://infra.spec.whatwg.org/#c0-control -CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f')) - -// https://infra.spec.whatwg.org/#c0-control-or-space -CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) - -// https://infra.spec.whatwg.org/#ascii-digit -CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) - -CHAR_TEST(8, IsASCIIOcDigit, (ch >= '0' && ch <= '7')) - -// https://infra.spec.whatwg.org/#ascii-hex-digit -CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || - (ch >= 'A' && ch <= 'F') || - (ch >= 'a' && ch <= 'f'))) - -// https://infra.spec.whatwg.org/#ascii-alpha -CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || - (ch >= 'a' && ch <= 'z'))) - -// https://infra.spec.whatwg.org/#ascii-alphanumeric -CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) - -// https://infra.spec.whatwg.org/#ascii-lowercase -template -T ASCIILowercase(T ch) { - return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; -} - -// https://url.spec.whatwg.org/#forbidden-host-code-point -CHAR_TEST(8, - IsForbiddenHostCodePoint, - ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' || - ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' || - ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' || - ch == '^' || ch == '|') - -// https://url.spec.whatwg.org/#forbidden-domain-code-point -CHAR_TEST(8, - IsForbiddenDomainCodePoint, - IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' || - ch == '\x7f') - -// https://url.spec.whatwg.org/#windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, - (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) - -// https://url.spec.whatwg.org/#normalized-windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, - (IsASCIIAlpha(ch1) && ch2 == ':')) - -#undef TWO_CHAR_STRING_TEST - -bool BitAt(const uint8_t a[], const uint8_t i) { - return !!(a[i >> 3] & (1 << (i & 7))); +void SetArgs(Environment* env, Local argv[12], const ada::result& url) { + Isolate* isolate = env->isolate(); + argv[0] = Utf8String(isolate, url->get_href()); + argv[1] = Utf8String(isolate, url->get_origin()); + argv[2] = Utf8String(isolate, url->get_protocol()); + argv[3] = Utf8String(isolate, url->get_host()); + argv[4] = Utf8String(isolate, url->get_hostname()); + argv[5] = Utf8String(isolate, url->get_pathname()); + argv[6] = Utf8String(isolate, url->get_search()); + argv[7] = Utf8String(isolate, url->get_username()); + argv[8] = Utf8String(isolate, url->get_password()); + argv[9] = Utf8String(isolate, url->get_port()); + argv[10] = Utf8String(isolate, url->get_hash()); + argv[11] = Boolean::New(isolate, url->host.has_value()); + argv[12] = Boolean::New(isolate, url->has_opaque_path); } -// Appends ch to str. If ch position in encode_set is set, the ch will -// be percent-encoded then appended. -void AppendOrEscape(std::string* str, - const unsigned char ch, - const uint8_t encode_set[]) { - if (BitAt(encode_set, ch)) - *str += hex + ch * 4; // "%XX\0" has a length of 4 - else - *str += ch; -} +void Parse(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 3); + CHECK(args[0]->IsString()); // input + // args[1] // base url + CHECK(args[2]->IsFunction()); // complete callback -unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); - UNREACHABLE(); -} + Local success_callback_ = args[2].As(); -std::string PercentDecode(const char* input, size_t len) { - std::string dest; - if (len == 0) - return dest; - dest.reserve(len); - const char* pointer = input; - const char* end = input + len; + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(env->isolate()); + Context::Scope context_scope(env->context()); - while (pointer < end) { - const char ch = pointer[0]; - size_t remaining = end - pointer - 1; - if (ch != '%' || remaining < 2 || - (ch == '%' && - (!IsASCIIHexDigit(pointer[1]) || - !IsASCIIHexDigit(pointer[2])))) { - dest += ch; - pointer++; - continue; - } else { - unsigned a = hex2bin(pointer[1]); - unsigned b = hex2bin(pointer[2]); - char c = static_cast(a * 16 + b); - dest += c; - pointer += 3; + Utf8Value input(env->isolate(), args[0]); + ada::result base; + ada::url* base_pointer = nullptr; + if (args[1]->IsString()) { + base = ada::parse(Utf8Value(env->isolate(), args[1]).ToString()); + if (!base) { + return args.GetReturnValue().Set(false); } + base_pointer = &base.value(); } - return dest; -} - -#define SPECIALS(XX) \ - XX(ftp, 21, "ftp:") \ - XX(file, -1, "file:") \ - XX(http, 80, "http:") \ - XX(https, 443, "https:") \ - XX(ws, 80, "ws:") \ - XX(wss, 443, "wss:") - -bool IsSpecial(const std::string& scheme) { -#define V(_, __, name) if (scheme == name) return true; - SPECIALS(V); -#undef V - return false; -} - -Local GetSpecial(Environment* env, const std::string& scheme) { -#define V(key, _, name) if (scheme == name) \ - return env->url_special_##key##_string(); - SPECIALS(V) -#undef V - UNREACHABLE(); -} - -int NormalizePort(const std::string& scheme, int p) { -#define V(_, port, name) if (scheme == name && p == port) return -1; - SPECIALS(V); -#undef V - return p; -} - -// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter -bool StartsWithWindowsDriveLetter(const char* p, const char* end) { - size_t length = end - p; - return length >= 2 && - IsWindowsDriveLetter(p[0], p[1]) && - (length == 2 || - p[2] == '/' || - p[2] == '\\' || - p[2] == '?' || - p[2] == '#'); -} - -#if defined(NODE_HAVE_I18N_SUPPORT) -bool ToUnicode(const std::string& input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; -} - -bool ToASCII(const std::string& input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) - return false; - if (buf.length() == 0) - return false; - output->assign(*buf, buf.length()); - return true; -} -#else // !defined(NODE_HAVE_I18N_SUPPORT) -// Intentional non-ops if ICU is not present. -bool ToUnicode(const std::string& input, std::string* output) { - *output = input; - return true; -} - -bool ToASCII(const std::string& input, std::string* output) { - *output = input; - return true; -} -#endif // !defined(NODE_HAVE_I18N_SUPPORT) - -#define NS_IN6ADDRSZ 16 - -void URLHost::ParseIPv6Host(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - - unsigned char buf[sizeof(struct in6_addr)]; - MaybeStackBuffer ipv6(length + 1); - *(*ipv6 + length) = 0; - memset(buf, 0, sizeof(buf)); - memcpy(*ipv6, input, sizeof(const char) * length); - - int ret = uv_inet_pton(AF_INET6, *ipv6, buf); - - if (ret != 0) { - return; - } + ada::result out = + ada::parse(std::string_view(input.out(), input.length()), base_pointer); - // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 - for (int i = 0; i < NS_IN6ADDRSZ; i += 2) { - value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1]; + if (!out) { + return args.GetReturnValue().Set(false); } - type_ = HostType::H_IPV6; -} - -// https://url.spec.whatwg.org/#ipv4-number-parser -int64_t ParseIPv4Number(const char* start, const char* end) { - if (end - start == 0) return -1; - - unsigned R = 10; - if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { - start += 2; - R = 16; - } else if (end - start >= 2 && start[0] == '0') { - start++; - R = 8; - } - - if (end - start == 0) return 0; - - const char* p = start; - - while (p < end) { - const char ch = p[0]; - switch (R) { - case 8: - if (ch < '0' || ch > '7') - return -1; - break; - case 10: - if (!IsASCIIDigit(ch)) - return -1; - break; - case 16: - if (!IsASCIIHexDigit(ch)) - return -1; - break; - } - p++; - } - return strtoll(start, nullptr, R); + const Local undef = Undefined(isolate); + Local argv[] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, out); + USE(success_callback_->Call( + env->context(), args.This(), arraysize(argv), argv)); + args.GetReturnValue().Set(true); } -// https://url.spec.whatwg.org/#ipv4-number-parser -bool IsIPv4NumberValid(const std::string_view input) { - if (input.empty()) { - return false; - } - - // If a number starts with '0' it might be a number with base 8 or base - // 16. If not, checking if all characters are digits proves that it is a - // base 10 number. - if (input.size() >= 2 && input[0] == '0') { - if (input[1] == 'X' || input[1] == 'x') { - if (input.size() == 2) { - return true; - } - - return std::all_of(input.begin() + 2, input.end(), [](const char& c) { - return IsASCIIHexDigit(c); - }); - } - - return std::all_of(input.begin() + 1, input.end(), [](const char& c) { - return IsASCIIOcDigit(c); - }); - } - - return std::all_of(input.begin(), input.end(), [](const char& c) { - return IsASCIIDigit(c); - }); -} +void DomainToASCII(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); -// https://url.spec.whatwg.org/#ends-in-a-number-checker -inline bool EndsInANumber(const std::string_view input) { + std::string input = Utf8Value(env->isolate(), args[0]).ToString(); if (input.empty()) { - return false; - } - - char delimiter = '.'; - auto last_index = input.size() - 1; - if (input.back() == delimiter) { - --last_index; - } - - std::string_view last{}; - auto pos = input.find_last_of(delimiter, last_index); - if (pos == std::string_view::npos) { - last = input.substr(0, last_index); - } else { - last = input.substr(pos + 1, last_index - pos); - } - - if (last.empty()) { - return false; + return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); } - if (std::all_of(last.begin(), last.end(), [](const char& c) { - return IsASCIIDigit(c); - })) { - return true; - } - - return IsIPv4NumberValid(last); +#if defined(NODE_HAVE_I18N_SUPPORT) + // It is important to have an initial value that contains a special scheme. + // Since it will change the implementation of `set_hostname` according to URL + // spec. + ada::result out = ada::parse("ws://x"); + DCHECK(out); + if (!out->set_hostname(input)) { + return args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + } + std::string host = out->get_hostname(); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), host.c_str()).ToLocalChecked()); +#else + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); +#endif } -void URLHost::ParseIPv4Host(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - const char* pointer = input; - const char* mark = input; - const char* end = pointer + length; - unsigned int parts = 0; - uint32_t val = 0; - uint64_t numbers[4]; - unsigned int tooBigNumbers = 0; - if (length == 0) - return; - - while (pointer <= end) { - const char ch = pointer < end ? pointer[0] : kEOL; - int64_t remaining = end - pointer - 1; - if (ch == '.' || ch == kEOL) { - if (++parts > arraysize(numbers)) return; - if (pointer == mark) - return; - int64_t n = ParseIPv4Number(mark, pointer); - if (n < 0) - return; - - if (n > 255) { - tooBigNumbers++; - } - numbers[parts - 1] = n; - mark = pointer + 1; - if (ch == '.' && remaining == 0) - break; - } - pointer++; - } - CHECK_GT(parts, 0); - - // If any but the last item in numbers is greater than 255, return failure. - // If the last item in numbers is greater than or equal to - // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || - numbers[parts - 1] >= UINT64_C(1) << (8 * (5 - parts))) { - return; - } +void DomainToUnicode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); - type_ = HostType::H_IPV4; - val = static_cast(numbers[parts - 1]); - for (unsigned int n = 0; n < parts - 1; n++) { - val += static_cast(numbers[n]) << (8 * (3 - n)); + std::string input = Utf8Value(env->isolate(), args[0]).ToString(); +#if defined(NODE_HAVE_I18N_SUPPORT) + // It is important to have an initial value that contains a special scheme. + // Since it will change the implementation of `set_hostname` according to URL + // spec. + ada::result out = ada::parse("ws://x"); + DCHECK(out); + if (!out->set_hostname(input)) { + return args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); } + std::string host = out->get_hostname(); - value_.ipv4 = val; -} + MaybeStackBuffer buf; + int32_t len = i18n::ToUnicode(&buf, host.data(), host.length()); -void URLHost::ParseOpaqueHost(const char* input, size_t length) { - CHECK_EQ(type_, HostType::H_FAILED); - std::string output; - output.reserve(length); - for (size_t i = 0; i < length; i++) { - const char ch = input[i]; - if (IsForbiddenHostCodePoint(ch)) { - return; - } else { - AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); - } + if (len < 0) { + return args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), "").ToLocalChecked()); } - SetOpaque(std::move(output)); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), *buf, NewStringType::kNormal, len) + .ToLocalChecked()); +#else // !defined(NODE_HAVE_I18N_SUPPORT) + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), input.c_str()).ToLocalChecked()); +#endif } -void URLHost::ParseHost(const char* input, - size_t length, - bool is_special, - bool unicode) { - CHECK_EQ(type_, HostType::H_FAILED); - const char* pointer = input; - - if (length == 0) - return; - - if (pointer[0] == '[') { - if (pointer[length - 1] != ']') - return; - return ParseIPv6Host(++pointer, length - 2); - } - - if (!is_special) - return ParseOpaqueHost(input, length); - - // First, we have to percent decode - std::string decoded = PercentDecode(input, length); +void UpdateUrl(const FunctionCallbackInfo& args) { + CHECK(args[0]->IsString()); // href + CHECK(args[1]->IsNumber()); // action type + CHECK(args[2]->IsString()); // new value + CHECK(args[3]->IsFunction()); // success callback - // Then we have to punycode toASCII - if (!ToASCII(decoded, &decoded)) - return; - - // If any of the following characters are still present, we have to fail - for (size_t n = 0; n < decoded.size(); n++) { - const char ch = decoded[n]; - if (IsForbiddenDomainCodePoint(ch)) { - return; - } - } - - // If domain ends in a number, then return the result of IPv4 parsing domain - if (EndsInANumber(decoded)) { - return ParseIPv4Host(decoded.c_str(), decoded.length()); - } - - // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(decoded, &decoded)) - return; - - // It's not an IPv4 or IPv6 address, it must be a domain - SetDomain(std::move(decoded)); -} - -// Locates the longest sequence of 0 segments in an IPv6 address -// in order to use the :: compression when serializing -template -T* FindLongestZeroSequence(T* values, size_t len) { - T* start = values; - T* end = start + len; - T* result = nullptr; + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); - T* current = nullptr; - unsigned counter = 0, longest = 1; + enum url_update_action action = static_cast( + args[1]->Uint32Value(env->context()).FromJust()); + Utf8Value input(isolate, args[0].As()); + Utf8Value new_value(isolate, args[2].As()); + Local success_callback_ = args[3].As(); - while (start < end) { - if (*start == 0) { - if (current == nullptr) - current = start; - counter++; - } else { - if (counter > longest) { - longest = counter; - result = current; - } - counter = 0; - current = nullptr; - } - start++; - } - if (counter > longest) - result = current; - return result; -} + std::string_view new_value_view = + std::string_view(new_value.out(), new_value.length()); + std::string_view input_view = std::string_view(input.out(), input.length()); + ada::result out = ada::parse(input_view); + CHECK(out); -std::string URLHost::ToStringMove() { - std::string return_value; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - return_value = std::move(value_.domain_or_opaque); - break; - default: - return_value = ToString(); - break; - } - Reset(); - return return_value; -} + bool result{true}; -std::string URLHost::ToString() const { - std::string dest; - switch (type_) { - case HostType::H_DOMAIN: - case HostType::H_OPAQUE: - return value_.domain_or_opaque; - case HostType::H_IPV4: { - dest.reserve(15); - uint32_t value = value_.ipv4; - for (int n = 0; n < 4; n++) { - dest.insert(0, std::to_string(value % 256)); - if (n < 3) - dest.insert(0, 1, '.'); - value /= 256; - } + switch (action) { + case kPathname: { + result = out->set_pathname(new_value_view); break; } - case HostType::H_IPV6: { - dest.reserve(41); - dest += '['; - const uint16_t* start = &value_.ipv6[0]; - const uint16_t* compress_pointer = - FindLongestZeroSequence(start, 8); - bool ignore0 = false; - for (int n = 0; n <= 7; n++) { - const uint16_t* piece = &value_.ipv6[n]; - if (ignore0 && *piece == 0) - continue; - else if (ignore0) - ignore0 = false; - if (compress_pointer == piece) { - dest += n == 0 ? "::" : ":"; - ignore0 = true; - continue; - } - char buf[5]; - snprintf(buf, sizeof(buf), "%x", *piece); - dest += buf; - if (n < 7) - dest += ':'; - } - dest += ']'; + case kHash: { + out->set_hash(new_value_view); break; } - case HostType::H_FAILED: + case kHost: { + result = out->set_host(new_value_view); break; - } - return dest; -} - -bool ParseHost(const std::string& input, - std::string* output, - bool is_special, - bool unicode = false) { - if (input.empty()) { - output->clear(); - return true; - } - URLHost host; - host.ParseHost(input.c_str(), input.length(), is_special, unicode); - if (host.ParsingFailed()) - return false; - *output = host.ToStringMove(); - return true; -} - -std::vector FromJSStringArray(Environment* env, - Local array) { - std::vector vec; - if (array->Length() > 0) - vec.reserve(array->Length()); - for (size_t n = 0; n < array->Length(); n++) { - Local val = array->Get(env->context(), n).ToLocalChecked(); - if (val->IsString()) { - Utf8Value value(env->isolate(), val.As()); - vec.emplace_back(*value, value.length()); - } - } - return vec; -} - -url_data HarvestBase(Environment* env, Local base_obj) { - url_data base; - Local context = env->context(); - - Local flags = - base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) - base.flags = flags->Int32Value(context).FromJust(); - - Local port = - base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - base.port = port->Int32Value(context).FromJust(); - - Local scheme = - base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); - base.scheme = Utf8Value(env->isolate(), scheme).out(); - - auto GetStr = [&](std::string url_data::*member, - int flag, - Local name, - bool empty_as_present) { - Local value = base_obj->Get(env->context(), name).ToLocalChecked(); - if (value->IsString()) { - Utf8Value utf8value(env->isolate(), value.As()); - (base.*member).assign(*utf8value, utf8value.length()); - if (empty_as_present || value.As()->Length() != 0) { - base.flags |= flag; - } - } - }; - GetStr(&url_data::username, - URL_FLAGS_HAS_USERNAME, - env->username_string(), - false); - GetStr(&url_data::password, - URL_FLAGS_HAS_PASSWORD, - env->password_string(), - false); - GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true); - GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true); - GetStr(&url_data::fragment, - URL_FLAGS_HAS_FRAGMENT, - env->fragment_string(), - true); - - Local - path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); - if (path->IsArray()) { - base.flags |= URL_FLAGS_HAS_PATH; - base.path = FromJSStringArray(env, path.As()); - } - return base; -} - -url_data HarvestContext(Environment* env, Local context_obj) { - url_data context; - Local flags = - context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) { - static constexpr int32_t kCopyFlagsMask = - URL_FLAGS_SPECIAL | - URL_FLAGS_CANNOT_BE_BASE | - URL_FLAGS_HAS_USERNAME | - URL_FLAGS_HAS_PASSWORD | - URL_FLAGS_HAS_HOST; - context.flags |= flags.As()->Value() & kCopyFlagsMask; - } - Local scheme = - context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); - if (scheme->IsString()) { - Utf8Value value(env->isolate(), scheme); - context.scheme.assign(*value, value.length()); - } - Local port = - context_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - context.port = port.As()->Value(); - if (context.flags & URL_FLAGS_HAS_USERNAME) { - Local username = - context_obj->Get(env->context(), - env->username_string()).ToLocalChecked(); - CHECK(username->IsString()); - Utf8Value value(env->isolate(), username); - context.username.assign(*value, value.length()); - } - if (context.flags & URL_FLAGS_HAS_PASSWORD) { - Local password = - context_obj->Get(env->context(), - env->password_string()).ToLocalChecked(); - CHECK(password->IsString()); - Utf8Value value(env->isolate(), password); - context.password.assign(*value, value.length()); - } - Local host = - context_obj->Get(env->context(), - env->host_string()).ToLocalChecked(); - if (host->IsString()) { - Utf8Value value(env->isolate(), host); - context.host.assign(*value, value.length()); - } - return context; -} - -// Single dot segment can be ".", "%2e", or "%2E" -bool IsSingleDotSegment(const std::string& str) { - switch (str.size()) { - case 1: - return str == "."; - case 3: - return str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e'; - default: - return false; - } -} - -// Double dot segment can be: -// "..", ".%2e", ".%2E", "%2e.", "%2E.", -// "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" -bool IsDoubleDotSegment(const std::string& str) { - switch (str.size()) { - case 2: - return str == ".."; - case 4: - if (str[0] != '.' && str[0] != '%') - return false; - return ((str[0] == '.' && - str[1] == '%' && - str[2] == '2' && - ASCIILowercase(str[3]) == 'e') || - (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '.')); - case 6: - return (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '%' && - str[4] == '2' && - ASCIILowercase(str[5]) == 'e'); - default: - return false; - } -} - -void ShortenUrlPath(struct url_data* url) { - if (url->path.empty()) return; - if (url->path.size() == 1 && url->scheme == "file:" && - IsNormalizedWindowsDriveLetter(url->path[0])) return; - url->path.pop_back(); -} - -} // anonymous namespace - -void URL::Parse(const char* input, - size_t len, - enum url_parse_state state_override, - struct url_data* url, - bool has_url, - const struct url_data* base, - bool has_base) { - const char* p = input; - const char* end = input + len; - - if (!has_url) { - for (const char* ptr = p; ptr < end; ptr++) { - if (IsC0ControlOrSpace(*ptr)) - p++; - else - break; } - for (const char* ptr = end - 1; ptr >= p; ptr--) { - if (IsC0ControlOrSpace(*ptr)) - end--; - else - break; + case kHostname: { + result = out->set_hostname(new_value_view); + break; } - input = p; - len = end - p; - } - - // The spec says we should strip out any ASCII tabs or newlines. - // In those cases, we create another std::string instance with the filtered - // contents, but in the general case we avoid the overhead. - std::string whitespace_stripped; - for (const char* ptr = p; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - continue; - // Hit tab or newline. Allocate storage, copy what we have until now, - // and then iterate and filter all similar characters out. - whitespace_stripped.reserve(len - 1); - whitespace_stripped.assign(p, ptr - p); - // 'ptr + 1' skips the current char, which we know to be tab or newline. - for (ptr = ptr + 1; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - whitespace_stripped += *ptr; + case kHref: { + result = out->set_href(new_value_view); + break; } - - // Update variables like they should have looked like if the string - // had been stripped of whitespace to begin with. - input = whitespace_stripped.c_str(); - len = whitespace_stripped.size(); - p = input; - end = input + len; - break; - } - - bool atflag = false; // Set when @ has been seen. - bool square_bracket_flag = false; // Set inside of [...] - bool password_token_seen_flag = false; // Set after a : after an username. - - std::string buffer; - - // Set the initial parse state. - const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = has_state_override ? state_override : - kSchemeStart; - - if (state < kSchemeStart || state > kFragment) { - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - while (p <= end) { - const char ch = p < end ? p[0] : kEOL; - bool special = (url->flags & URL_FLAGS_SPECIAL); - bool cannot_be_base; - bool special_back_slash = (special && ch == '\\'); - - switch (state) { - case kSchemeStart: - if (IsASCIIAlpha(ch)) { - buffer += ASCIILowercase(ch); - state = kScheme; - } else if (!has_state_override) { - state = kNoScheme; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kScheme: - if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') { - buffer += ASCIILowercase(ch); - } else if (ch == ':' || (has_state_override && ch == kEOL)) { - if (has_state_override && buffer.size() == 0) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - buffer += ':'; - - bool new_is_special = IsSpecial(buffer); - - if (has_state_override) { - if ((special != new_is_special) || - ((buffer == "file:") && - ((url->flags & URL_FLAGS_HAS_USERNAME) || - (url->flags & URL_FLAGS_HAS_PASSWORD) || - (url->port != -1))) || - (url->scheme == "file:" && url->host.empty())) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - } - - url->scheme = std::move(buffer); - url->port = NormalizePort(url->scheme, url->port); - if (new_is_special) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - // `special_back_slash` equals to `(special && ch == '\\')` and `ch` - // here always not equals to `\\`. So `special_back_slash` here always - // equals to `false`. - special_back_slash = false; - buffer.clear(); - if (has_state_override) - return; - if (url->scheme == "file:") { - state = kFile; - } else if (special && - has_base && - url->scheme == base->scheme) { - state = kSpecialRelativeOrAuthority; - } else if (special) { - state = kSpecialAuthoritySlashes; - } else if (p + 1 < end && p[1] == '/') { - state = kPathOrAuthority; - p++; - } else { - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - state = kCannotBeBase; - } - } else if (!has_state_override) { - buffer.clear(); - state = kNoScheme; - p = input; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kNoScheme: - cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE); - if (!has_base || (cannot_be_base && ch != '#')) { - url->flags |= URL_FLAGS_FAILED; - return; - } else if (cannot_be_base && ch == '#') { - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - special_back_slash = (special && ch == '\\'); - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_FRAGMENT) { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = base->fragment; - } - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - state = kFragment; - } else if (has_base && - base->scheme != "file:") { - state = kRelative; - continue; - } else { - url->scheme = "file:"; - url->flags |= URL_FLAGS_SPECIAL; - special = true; - state = kFile; - special_back_slash = (special && ch == '\\'); - continue; - } - break; - case kSpecialRelativeOrAuthority: - if (ch == '/' && p + 1 < end && p[1] == '/') { - state = kSpecialAuthorityIgnoreSlashes; - p++; - } else { - state = kRelative; - continue; - } - break; - case kPathOrAuthority: - if (ch == '/') { - state = kAuthority; - } else { - state = kPath; - continue; - } - break; - case kRelative: - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - special = false; - } - special_back_slash = (special && ch == '\\'); - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - break; - case '/': - state = kRelativeSlash; - break; - case '?': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kFragment; - break; - default: - if (special_back_slash) { - state = kRelativeSlash; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - ShortenUrlPath(url); - } - url->port = base->port; - state = kPath; - continue; - } - } - break; - case kRelativeSlash: - if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { - state = kSpecialAuthorityIgnoreSlashes; - } else if (ch == '/') { - state = kAuthority; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - url->port = base->port; - state = kPath; - continue; - } - break; - case kSpecialAuthoritySlashes: - state = kSpecialAuthorityIgnoreSlashes; - if (ch == '/' && p + 1 < end && p[1] == '/') { - p++; - } else { - continue; - } - break; - case kSpecialAuthorityIgnoreSlashes: - if (ch != '/' && ch != '\\') { - state = kAuthority; - continue; - } - break; - case kAuthority: - if (ch == '@') { - if (atflag) { - buffer.reserve(buffer.size() + 3); - buffer.insert(0, "%40"); - } - atflag = true; - size_t blen = buffer.size(); - if (blen > 0 && buffer[0] != ':') { - url->flags |= URL_FLAGS_HAS_USERNAME; - } - for (size_t n = 0; n < blen; n++) { - const char bch = buffer[n]; - if (bch == ':') { - url->flags |= URL_FLAGS_HAS_PASSWORD; - if (!password_token_seen_flag) { - password_token_seen_flag = true; - continue; - } - } - if (password_token_seen_flag) { - AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET); - } else { - AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET); - } - } - buffer.clear(); - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (atflag && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - p -= buffer.size() + 1; - buffer.clear(); - state = kHost; - } else { - buffer += ch; - } - break; - case kHost: - case kHostname: - if (has_state_override && url->scheme == "file:") { - state = kFileHost; - continue; - } else if (ch == ':' && !square_bracket_flag) { - if (buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (state_override == kHostname) { - return; - } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(buffer, &url->host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - buffer.clear(); - state = kPort; - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p--; - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (has_state_override && - buffer.size() == 0 && - ((url->username.size() > 0 || url->password.size() > 0) || - url->port != -1)) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(buffer, &url->host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - buffer.clear(); - state = kPathStart; - if (has_state_override) { - return; - } - } else { - if (ch == '[') - square_bracket_flag = true; - if (ch == ']') - square_bracket_flag = false; - buffer += ch; - } - break; - case kPort: - if (IsASCIIDigit(ch)) { - buffer += ch; - } else if (has_state_override || - ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (buffer.size() > 0) { - unsigned port = 0; - // the condition port <= 0xffff prevents integer overflow - for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++) - port = port * 10 + buffer[i] - '0'; - if (port > 0xffff) { - // TODO(TimothyGu): This hack is currently needed for the host - // setter since it needs access to hostname if it is valid, and - // if the FAILED flag is set the entire response to JS layer - // will be empty. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_FAILED; - return; - } - // the port is valid - url->port = NormalizePort(url->scheme, static_cast(port)); - if (url->port == -1) - url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; - buffer.clear(); - } else if (has_state_override) { - // TODO(TimothyGu): Similar case as above. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_TERMINATED; - return; - } - state = kPathStart; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; - return; - } - break; - case kFile: - url->scheme = "file:"; - url->host.clear(); - url->flags |= URL_FLAGS_HAS_HOST; - if (ch == '/' || ch == '\\') { - state = kFileSlash; - } else if (has_base && base->scheme == "file:") { - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - break; - case '?': - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - break; - default: - url->query.clear(); - if (base->flags & URL_FLAGS_HAS_HOST) { - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (!StartsWithWindowsDriveLetter(p, end)) { - ShortenUrlPath(url); - } else { - url->path.clear(); - } - state = kPath; - continue; - } - } else { - state = kPath; - continue; - } - break; - case kFileSlash: - if (ch == '/' || ch == '\\') { - state = kFileHost; - } else { - if (has_base && base->scheme == "file:") { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - if (!StartsWithWindowsDriveLetter(p, end) && - IsNormalizedWindowsDriveLetter(base->path[0])) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); - } - } - state = kPath; - continue; - } - break; - case kFileHost: - if (ch == kEOL || - ch == '/' || - ch == '\\' || - ch == '?' || - ch == '#') { - if (!has_state_override && - buffer.size() == 2 && - IsWindowsDriveLetter(buffer)) { - state = kPath; - } else if (buffer.size() == 0) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host.clear(); - if (has_state_override) - return; - state = kPathStart; - } else { - std::string host; - if (!ParseHost(buffer, &host, special)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - if (host == "localhost") - host.clear(); - url->flags |= URL_FLAGS_HAS_HOST; - url->host = host; - if (has_state_override) - return; - buffer.clear(); - state = kPathStart; - } - continue; - } else { - buffer += ch; - } - break; - case kPathStart: - if (IsSpecial(url->scheme)) { - state = kPath; - if (ch != '/' && ch != '\\') { - continue; - } - } else if (!has_state_override && ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (!has_state_override && ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } else if (ch != kEOL) { - state = kPath; - if (ch != '/') { - continue; - } - } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } - break; - case kPath: - if (ch == kEOL || - ch == '/' || - special_back_slash || - (!has_state_override && (ch == '?' || ch == '#'))) { - if (IsDoubleDotSegment(buffer)) { - ShortenUrlPath(url); - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } - } else if (IsSingleDotSegment(buffer) && - ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(""); - } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && - url->path.empty() && - buffer.size() == 2 && - IsWindowsDriveLetter(buffer)) { - buffer[1] = ':'; - } - url->flags |= URL_FLAGS_HAS_PATH; - url->path.emplace_back(std::move(buffer)); - } - buffer.clear(); - if (ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } - } else { - AppendOrEscape(&buffer, ch, PATH_ENCODE_SET); - } - break; - case kCannotBeBase: - switch (ch) { - case '?': - state = kQuery; - break; - case '#': - state = kFragment; - break; - default: - if (url->path.empty()) - url->path.emplace_back(""); - else if (ch != kEOL) - AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET); - } - break; - case kQuery: - if (ch == kEOL || (!has_state_override && ch == '#')) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = std::move(buffer); - buffer.clear(); - if (ch == '#') - state = kFragment; - } else { - AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL : - QUERY_ENCODE_SET_NONSPECIAL); - } - break; - case kFragment: - switch (ch) { - case kEOL: - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = std::move(buffer); - break; - default: - AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET); - } - break; - default: - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; + case kPassword: { + result = out->set_password(new_value_view); + break; } - - p++; - } -} // NOLINT(readability/fn_size) - -// https://url.spec.whatwg.org/#url-serializing -std::string URL::SerializeURL(const url_data& url, - bool exclude = false) { - std::string output; - output.reserve( - 10 + // We generally insert < 10 separator characters between URL parts - url.scheme.size() + - url.username.size() + - url.password.size() + - url.host.size() + - url.query.size() + - url.fragment.size() + - url.href.size() + - std::accumulate( - url.path.begin(), - url.path.end(), - 0, - [](size_t sum, const auto& str) { return sum + str.size(); })); - - output += url.scheme; - if (url.flags & URL_FLAGS_HAS_HOST) { - output += "//"; - if (url.flags & URL_FLAGS_HAS_USERNAME || - url.flags & URL_FLAGS_HAS_PASSWORD) { - if (url.flags & URL_FLAGS_HAS_USERNAME) { - output += url.username; - } - if (url.flags & URL_FLAGS_HAS_PASSWORD) { - output += ":" + url.password; - } - output += "@"; + case kPort: { + result = out->set_port(new_value_view); + break; } - output += url.host; - if (url.port != -1) { - output += ":" + std::to_string(url.port); + case kProtocol: { + result = out->set_protocol(new_value_view); + break; } - } - if (url.flags & URL_FLAGS_CANNOT_BE_BASE) { - output += url.path[0]; - } else { - if (!(url.flags & URL_FLAGS_HAS_HOST) && - url.path.size() > 1 && - url.path[0].empty()) { - output += "/."; + case kSearch: { + out->set_search(new_value_view); + break; } - for (size_t i = 1; i < url.path.size(); i++) { - output += "/" + url.path[i]; + case kUsername: { + result = out->set_username(new_value_view); + break; } } - if (url.flags & URL_FLAGS_HAS_QUERY) { - output += "?" + url.query; - } - if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) { - output += "#" + url.fragment; - } - output.shrink_to_fit(); - return output; -} - -namespace { -void SetArgs(Environment* env, - Local argv[ARG_COUNT], - const struct url_data& url) { - Isolate* isolate = env->isolate(); - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ARG_PROTOCOL] = - url.flags & URL_FLAGS_SPECIAL ? - GetSpecial(env, url.scheme) : - OneByteString(isolate, url.scheme.c_str()); - if (url.flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = Utf8String(isolate, url.username); - if (url.flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = Utf8String(isolate, url.password); - if (url.flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = Utf8String(isolate, url.host); - if (url.flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = Utf8String(isolate, url.query); - if (url.flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment); - if (url.port > -1) - argv[ARG_PORT] = Integer::New(isolate, url.port); - if (url.flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked(); -} - -void Parse(Environment* env, - Local recv, - const char* input, - size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb, - Local error_cb) { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const bool has_context = context_obj->IsObject(); - const bool has_base = base_obj->IsObject(); - - url_data base; - url_data url; - if (has_context) - url = HarvestContext(env, context_obj.As()); - if (has_base) - base = HarvestBase(env, base_obj.As()); - URL::Parse(input, len, state_override, &url, has_context, &base, has_base); - if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && - (url.flags & URL_FLAGS_TERMINATED))) - return; - - // Define the return value placeholders const Local undef = Undefined(isolate); - const Local null = Null(isolate); - if (!(url.flags & URL_FLAGS_FAILED)) { - Local argv[] = { + Local argv[] = { undef, undef, undef, undef, - null, // host defaults to null - null, // port defaults to null undef, - null, // query defaults to null - null, // fragment defaults to null - }; - SetArgs(env, argv, url); - USE(cb->Call(context, recv, arraysize(argv), argv)); - } else if (error_cb->IsFunction()) { - Local flags = Integer::NewFromUnsigned(isolate, url.flags); - USE(error_cb.As()->Call(context, recv, 1, &flags)); - } -} - -void Parse(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input - CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || - args[2]->IsObject()); - CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || - args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback - CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback - - Utf8Value input(env->isolate(), args[0]); - enum url_parse_state state_override = kUnknownState; - if (args[1]->IsNumber()) { - state_override = static_cast( - args[1]->Uint32Value(env->context()).FromJust()); - } - - Parse(env, args.This(), - *input, input.length(), - state_override, - args[2], - args[3], - args[4].As(), - args[5]); -} - -void EncodeAuthSet(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - std::string output; - size_t len = value.length(); - output.reserve(len); - for (size_t n = 0; n < len; n++) { - const char ch = (*value)[n]; - AppendOrEscape(&output, ch, USERINFO_ENCODE_SET); - } - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked()); -} - -void DomainToASCII(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - URLHost host; - // Assuming the host is used for a special scheme. - host.ParseHost(*value, value.length(), true); - if (host.ParsingFailed()) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; - } - std::string out = host.ToStringMove(); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked()); -} - -void DomainToUnicode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - URLHost host; - // Assuming the host is used for a special scheme. - host.ParseHost(*value, value.length(), true, true); - if (host.ParsingFailed()) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; - } - std::string out = host.ToStringMove(); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked()); + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, out); + USE(success_callback_->Call( + env->context(), args.This(), arraysize(argv), argv)); + args.GetReturnValue().Set(result); } void Initialize(Local target, @@ -1826,38 +272,29 @@ void Initialize(Local target, Local context, void* priv) { SetMethod(context, target, "parse", Parse); - SetMethodNoSideEffect(context, target, "encodeAuth", EncodeAuthSet); + SetMethod(context, target, "updateUrl", UpdateUrl); + SetMethodNoSideEffect(context, target, "domainToASCII", DomainToASCII); SetMethodNoSideEffect(context, target, "domainToUnicode", DomainToUnicode); - -#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); - FLAGS(XX) -#undef XX - -#define XX(name) NODE_DEFINE_CONSTANT(target, name); - PARSESTATES(XX) -#undef XX } } // namespace void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Parse); - registry->Register(EncodeAuthSet); + registry->Register(UpdateUrl); + registry->Register(DomainToASCII); registry->Register(DomainToUnicode); } -URL URL::FromFilePath(const std::string& file_path) { - URL url("file://"); +std::string FromFilePath(const std::string_view file_path) { std::string escaped_file_path; for (size_t i = 0; i < file_path.length(); ++i) { escaped_file_path += file_path[i]; - if (file_path[i] == '%') - escaped_file_path += "25"; + if (file_path[i] == '%') escaped_file_path += "25"; } - URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart, - &url.context_, true, nullptr, false); - return url; + + return ada::href_from_file(escaped_file_path); } } // namespace url diff --git a/src/node_url.h b/src/node_url.h index bec281661e6f5e1..c3d895d2f6092f8 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -3,196 +3,18 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS +#include "ada.h" #include "node.h" +#include "util.h" #include namespace node { namespace url { -#define PARSESTATES(XX) \ - XX(kSchemeStart) \ - XX(kScheme) \ - XX(kNoScheme) \ - XX(kSpecialRelativeOrAuthority) \ - XX(kPathOrAuthority) \ - XX(kRelative) \ - XX(kRelativeSlash) \ - XX(kSpecialAuthoritySlashes) \ - XX(kSpecialAuthorityIgnoreSlashes) \ - XX(kAuthority) \ - XX(kHost) \ - XX(kHostname) \ - XX(kPort) \ - XX(kFile) \ - XX(kFileSlash) \ - XX(kFileHost) \ - XX(kPathStart) \ - XX(kPath) \ - XX(kCannotBeBase) \ - XX(kQuery) \ - XX(kFragment) - -#define FLAGS(XX) \ - XX(URL_FLAGS_NONE, 0) \ - XX(URL_FLAGS_FAILED, 0x01) \ - XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \ - XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ - XX(URL_FLAGS_TERMINATED, 0x08) \ - XX(URL_FLAGS_SPECIAL, 0x10) \ - XX(URL_FLAGS_HAS_USERNAME, 0x20) \ - XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ - XX(URL_FLAGS_HAS_HOST, 0x80) \ - XX(URL_FLAGS_HAS_PATH, 0x100) \ - XX(URL_FLAGS_HAS_QUERY, 0x200) \ - XX(URL_FLAGS_HAS_FRAGMENT, 0x400) \ - XX(URL_FLAGS_IS_DEFAULT_SCHEME_PORT, 0x800) \ - -enum url_parse_state { - kUnknownState = -1, -#define XX(name) name, - PARSESTATES(XX) -#undef XX -}; - -enum url_flags { -#define XX(name, val) name = val, - FLAGS(XX) -#undef XX -}; - -struct url_data { - int32_t flags = URL_FLAGS_NONE; - int port = -1; - std::string scheme; - std::string username; - std::string password; - std::string host; - std::string query; - std::string fragment; - std::vector path; - std::string href; -}; - -namespace table_data { -extern const char hex[1024]; -extern const uint8_t C0_CONTROL_ENCODE_SET[32]; -extern const uint8_t FRAGMENT_ENCODE_SET[32]; -extern const uint8_t PATH_ENCODE_SET[32]; -extern const uint8_t USERINFO_ENCODE_SET[32]; -extern const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32]; -extern const uint8_t QUERY_ENCODE_SET_SPECIAL[32]; -} - -class URL { - public: - static void Parse(const char* input, - size_t len, - enum url_parse_state state_override, - struct url_data* url, - bool has_url, - const struct url_data* base, - bool has_base); - - static std::string SerializeURL(const url_data& url, bool exclude); - - URL(const char* input, const size_t len) { - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - - URL(const char* input, const size_t len, const URL* base) { - if (base != nullptr) - Parse(input, len, kUnknownState, - &context_, false, - &(base->context_), true); - else - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - - URL(const char* input, const size_t len, - const char* base, const size_t baselen) { - if (base != nullptr && baselen > 0) { - URL _base(base, baselen); - Parse(input, len, kUnknownState, - &context_, false, - &(_base.context_), true); - } else { - Parse(input, len, kUnknownState, &context_, false, nullptr, false); - } - } - - explicit URL(const std::string& input) : - URL(input.c_str(), input.length()) {} - - URL(const std::string& input, const URL* base) : - URL(input.c_str(), input.length(), base) {} - - URL(const std::string& input, const URL& base) : - URL(input.c_str(), input.length(), &base) {} - - URL(const std::string& input, const std::string& base) : - URL(input.c_str(), input.length(), base.c_str(), base.length()) {} - - int32_t flags() const { - return context_.flags; - } - - int port() const { - return context_.port; - } - - const std::string& protocol() const { - return context_.scheme; - } - - const std::string& username() const { - return context_.username; - } - - const std::string& password() const { - return context_.password; - } - - const std::string& host() const { - return context_.host; - } - - const std::string& query() const { - return context_.query; - } - - const std::string& fragment() const { - return context_.fragment; - } - - std::string path() const { - std::string ret; - for (const std::string& element : context_.path) { - ret += '/' + element; - } - return ret; - } - - std::string href() const { - return SerializeURL(context_, false); - } - - // Get the file URL from native file system path. - static URL FromFilePath(const std::string& file_path); - - URL(const URL&) = default; - URL& operator=(const URL&) = default; - URL(URL&&) = default; - URL& operator=(URL&&) = default; - - URL() : URL("") {} - - private: - url_data context_; -}; +std::string FromFilePath(const std::string_view file_path); } // namespace url - } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/src/node_url_tables.cc b/src/node_url_tables.cc deleted file mode 100644 index 801badf838dc830..000000000000000 --- a/src/node_url_tables.cc +++ /dev/null @@ -1,448 +0,0 @@ -#include -#include "node_url.h" - -namespace node { -namespace url { -namespace table_data { - -const char hex[1024] = - "%00\0%01\0%02\0%03\0%04\0%05\0%06\0%07\0" - "%08\0%09\0%0A\0%0B\0%0C\0%0D\0%0E\0%0F\0" - "%10\0%11\0%12\0%13\0%14\0%15\0%16\0%17\0" - "%18\0%19\0%1A\0%1B\0%1C\0%1D\0%1E\0%1F\0" - "%20\0%21\0%22\0%23\0%24\0%25\0%26\0%27\0" - "%28\0%29\0%2A\0%2B\0%2C\0%2D\0%2E\0%2F\0" - "%30\0%31\0%32\0%33\0%34\0%35\0%36\0%37\0" - "%38\0%39\0%3A\0%3B\0%3C\0%3D\0%3E\0%3F\0" - "%40\0%41\0%42\0%43\0%44\0%45\0%46\0%47\0" - "%48\0%49\0%4A\0%4B\0%4C\0%4D\0%4E\0%4F\0" - "%50\0%51\0%52\0%53\0%54\0%55\0%56\0%57\0" - "%58\0%59\0%5A\0%5B\0%5C\0%5D\0%5E\0%5F\0" - "%60\0%61\0%62\0%63\0%64\0%65\0%66\0%67\0" - "%68\0%69\0%6A\0%6B\0%6C\0%6D\0%6E\0%6F\0" - "%70\0%71\0%72\0%73\0%74\0%75\0%76\0%77\0" - "%78\0%79\0%7A\0%7B\0%7C\0%7D\0%7E\0%7F\0" - "%80\0%81\0%82\0%83\0%84\0%85\0%86\0%87\0" - "%88\0%89\0%8A\0%8B\0%8C\0%8D\0%8E\0%8F\0" - "%90\0%91\0%92\0%93\0%94\0%95\0%96\0%97\0" - "%98\0%99\0%9A\0%9B\0%9C\0%9D\0%9E\0%9F\0" - "%A0\0%A1\0%A2\0%A3\0%A4\0%A5\0%A6\0%A7\0" - "%A8\0%A9\0%AA\0%AB\0%AC\0%AD\0%AE\0%AF\0" - "%B0\0%B1\0%B2\0%B3\0%B4\0%B5\0%B6\0%B7\0" - "%B8\0%B9\0%BA\0%BB\0%BC\0%BD\0%BE\0%BF\0" - "%C0\0%C1\0%C2\0%C3\0%C4\0%C5\0%C6\0%C7\0" - "%C8\0%C9\0%CA\0%CB\0%CC\0%CD\0%CE\0%CF\0" - "%D0\0%D1\0%D2\0%D3\0%D4\0%D5\0%D6\0%D7\0" - "%D8\0%D9\0%DA\0%DB\0%DC\0%DD\0%DE\0%DF\0" - "%E0\0%E1\0%E2\0%E3\0%E4\0%E5\0%E6\0%E7\0" - "%E8\0%E9\0%EA\0%EB\0%EC\0%ED\0%EE\0%EF\0" - "%F0\0%F1\0%F2\0%F3\0%F4\0%F5\0%F6\0%F7\0" - "%F8\0%F9\0%FA\0%FB\0%FC\0%FD\0%FE\0%FF"; - -const uint8_t C0_CONTROL_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t FRAGMENT_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - - -const uint8_t PATH_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t USERINFO_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -// Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded. -const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -} // namespace table_data -} // namespace url -} // namespace node diff --git a/test/benchmark/test-benchmark-url.js b/test/benchmark/test-benchmark-url.js index 664e7c4d8dc8279..f4eb4efa2345990 100644 --- a/test/benchmark/test-benchmark-url.js +++ b/test/benchmark/test-benchmark-url.js @@ -1,6 +1,18 @@ 'use strict'; -require('../common'); +const common = require('../common'); + +// TODO(@anonrig): Remove this check when Ada removes ICU requirement. +if (!common.hasIntl) { + // A handful of the benchmarks fail when ICU is not included. + // ICU is responsible for ignoring certain inputs from the hostname + // and without it, it is not possible to validate the correctness of the input. + // DomainToASCII method in Unicode specification states which characters are + // ignored and/or remapped. Doing this outside of the scope of DomainToASCII, + // would be a violation of the WHATWG URL specification. + // Please look into: https://unicode.org/reports/tr46/#ProcessingStepMap + common.skip('missing Intl'); +} const runBenchmark = require('../common/benchmark'); diff --git a/test/cctest/test_url.cc b/test/cctest/test_url.cc deleted file mode 100644 index 080129b3ddd1ab0..000000000000000 --- a/test/cctest/test_url.cc +++ /dev/null @@ -1,188 +0,0 @@ -#include "node_url.h" -#include "node_i18n.h" -#include "util-inl.h" - -#include "gtest/gtest.h" - -using node::url::URL; -using node::url::URL_FLAGS_FAILED; - -class URLTest : public ::testing::Test { - protected: - void SetUp() override { -#if defined(NODE_HAVE_I18N_SUPPORT) - std::string icu_data_dir; - node::i18n::InitializeICUDirectory(icu_data_dir); -#endif - } - - void TearDown() override {} -}; - -TEST_F(URLTest, Simple) { - URL simple("https://example.org:81/a/b/c?query#fragment"); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "https:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.port(), 81); - EXPECT_EQ(simple.path(), "/a/b/c"); - EXPECT_EQ(simple.query(), "query"); - EXPECT_EQ(simple.fragment(), "fragment"); -} - -TEST_F(URLTest, Simple2) { - const char* input = "https://example.org:81/a/b/c?query#fragment"; - URL simple(input, strlen(input)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "https:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.port(), 81); - EXPECT_EQ(simple.path(), "/a/b/c"); - EXPECT_EQ(simple.query(), "query"); - EXPECT_EQ(simple.fragment(), "fragment"); -} - -TEST_F(URLTest, ForbiddenHostCodePoint) { - URL error("https://exa|mple.org:81/a/b/c?query#fragment"); - EXPECT_TRUE(error.flags() & URL_FLAGS_FAILED); -} - -TEST_F(URLTest, NoBase1) { - URL error("123noscheme"); - EXPECT_TRUE(error.flags() & URL_FLAGS_FAILED); -} - -TEST_F(URLTest, Base1) { - URL base("http://example.org/foo/bar"); - ASSERT_FALSE(base.flags() & URL_FLAGS_FAILED); - - URL simple("../baz", &base); - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base2) { - URL simple("../baz", "http://example.org/foo/bar"); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base3) { - const char* input = "../baz"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/baz"); -} - -TEST_F(URLTest, Base4) { - const char* input = "\\x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/x"); -} - -TEST_F(URLTest, Base5) { - const char* input = "/x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "example.org"); - EXPECT_EQ(simple.path(), "/x"); -} - -TEST_F(URLTest, Base6) { - const char* input = "\\\\x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "x"); -} - -TEST_F(URLTest, Base7) { - const char* input = "//x"; - const char* base = "http://example.org/foo/bar"; - - URL simple(input, strlen(input), base, strlen(base)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), "x"); -} - -TEST_F(URLTest, TruncatedAfterProtocol) { - char input[2] = { 'q', ':' }; - URL simple(input, sizeof(input)); - - EXPECT_FALSE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "q:"); - EXPECT_EQ(simple.host(), ""); - EXPECT_EQ(simple.path(), "/"); -} - -TEST_F(URLTest, TruncatedAfterProtocol2) { - char input[6] = { 'h', 't', 't', 'p', ':', '/' }; - URL simple(input, sizeof(input)); - - EXPECT_TRUE(simple.flags() & URL_FLAGS_FAILED); - EXPECT_EQ(simple.protocol(), "http:"); - EXPECT_EQ(simple.host(), ""); - EXPECT_EQ(simple.path(), ""); -} - -TEST_F(URLTest, FromFilePath) { - URL file_url; -#ifdef _WIN32 - file_url = URL::FromFilePath("C:\\Program Files\\"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//C:/Program%20Files/", file_url.path()); - EXPECT_EQ("file:///C:/Program%20Files/", file_url.href()); - - file_url = URL::FromFilePath("C:\\a\\b\\c"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//C:/a/b/c", file_url.path()); - EXPECT_EQ("file:///C:/a/b/c", file_url.href()); - - file_url = URL::FromFilePath("b:\\a\\%%.js"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//b:/a/%25%25.js", file_url.path()); - EXPECT_EQ("file:///b:/a/%25%25.js", file_url.href()); -#else - file_url = URL::FromFilePath("/"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//", file_url.path()); - EXPECT_EQ("file:///", file_url.href()); - - file_url = URL::FromFilePath("/a/b/c"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//a/b/c", file_url.path()); - EXPECT_EQ("file:///a/b/c", file_url.href()); - - file_url = URL::FromFilePath("/a/%%.js"); - EXPECT_EQ("file:", file_url.protocol()); - EXPECT_EQ("//a/%25%25.js", file_url.path()); - EXPECT_EQ("file:///a/%25%25.js", file_url.href()); -#endif -} diff --git a/test/fuzzers/fuzz_url.cc b/test/fuzzers/fuzz_url.cc deleted file mode 100644 index 16c5f644893f86c..000000000000000 --- a/test/fuzzers/fuzz_url.cc +++ /dev/null @@ -1,11 +0,0 @@ -#include - -#include "node.h" -#include "node_internals.h" -#include "node_url.h" - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - node::url::URL url2(reinterpret_cast(data), size); - - return 0; -} diff --git a/test/parallel/test-process-versions.js b/test/parallel/test-process-versions.js index 91ffd644f01a93c..23590d58a587d96 100644 --- a/test/parallel/test-process-versions.js +++ b/test/parallel/test-process-versions.js @@ -18,6 +18,7 @@ const expected_keys = [ 'uvwasi', 'acorn', 'simdutf', + 'ada', ]; const hasUndici = process.config.variables.node_builtin_shareable_builtins.includes('deps/undici/undici.js'); diff --git a/test/parallel/test-whatwg-url-custom-inspect.js b/test/parallel/test-whatwg-url-custom-inspect.js index ad77f5725d30ed6..e64f9991f5f0d9a 100644 --- a/test/parallel/test-whatwg-url-custom-inspect.js +++ b/test/parallel/test-whatwg-url-custom-inspect.js @@ -45,18 +45,20 @@ assert.strictEqual( search: '?que=ry', searchParams: URLSearchParams { 'que' => 'ry' }, hash: '#hash', - cannotBeBase: false, - special: true, [Symbol(context)]: URLContext { - flags: 2032, - scheme: 'https:', + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + host: 'host.name:8080', + hostname: 'host.name', + pathname: '/path/name/', + search: '?que=ry', username: 'username', password: 'password', - host: 'host.name', - port: 8080, - path: [ 'path', 'name', '', [length]: 3 ], - query: 'que=ry', - fragment: 'hash' + port: '8080', + hash: '#hash', + hasHost: true, + hasOpaquePath: false } }`); diff --git a/test/parallel/test-whatwg-url-properties.js b/test/parallel/test-whatwg-url-properties.js index 98a16bdbbdcf6b0..69ce14a431a9b75 100644 --- a/test/parallel/test-whatwg-url-properties.js +++ b/test/parallel/test-whatwg-url-properties.js @@ -1,7 +1,7 @@ 'use strict'; require('../common'); const assert = require('assert'); -const { URL, URLSearchParams } = require('url'); +const { URL, URLSearchParams, format } = require('url'); [ { name: 'toString' }, @@ -11,6 +11,17 @@ const { URL, URLSearchParams } = require('url'); testMethod(URL.prototype, name); }); +[ + 'http://www.google.com', + 'https://www.domain.com:443', + 'file:///Users/yagiz/Developer/node', +].forEach((url) => { + const u = new URL(url); + assert.strictEqual(JSON.stringify(u), `"${u.href}"`); + assert.strictEqual(u.toString(), u.href); + assert.strictEqual(format(u), u.href); +}); + [ { name: 'href' }, { name: 'protocol' },