From e2eb11486e785e0c9737d2e89afd941c32ed8bba Mon Sep 17 00:00:00 2001 From: James M Snell Date: Wed, 4 Oct 2023 16:08:18 -0700 Subject: [PATCH] Add ada-url dependency, initial impl of jsg::Url This will serve as the new underlying implementation of the url-standard.{h|c++} class while also supporting handling of module import specifiers as URLs. --- .bazelrc | 1 + .github/secret_scanning.yml | 1 + WORKSPACE | 10 + build/BUILD.ada-url | 12 + compile_flags.txt | 2 + src/workerd/jsg/BUILD.bazel | 12 + src/workerd/jsg/jsg.h | 1 + src/workerd/jsg/url-test-corpus-failures.h | 273 ++++++++++ src/workerd/jsg/url-test-corpus-success.h | 549 +++++++++++++++++++++ src/workerd/jsg/url-test.c++ | 79 +++ src/workerd/jsg/url.c++ | 261 ++++++++++ src/workerd/jsg/url.h | 112 +++++ 12 files changed, 1313 insertions(+) create mode 100644 build/BUILD.ada-url create mode 100644 src/workerd/jsg/url-test-corpus-failures.h create mode 100644 src/workerd/jsg/url-test-corpus-success.h create mode 100644 src/workerd/jsg/url-test.c++ create mode 100644 src/workerd/jsg/url.c++ create mode 100644 src/workerd/jsg/url.h diff --git a/.bazelrc b/.bazelrc index 2d2fba96b3c..32d842a1da2 100644 --- a/.bazelrc +++ b/.bazelrc @@ -189,6 +189,7 @@ build:windows --per_file_copt='external/ssl/src/crypto/poly1305/poly1305_vec\.c@ build:windows --per_file_copt='external/v8/src/objects/literal-objects\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/objects/literal-objects.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' build:windows --per_file_copt='external/v8/src/runtime/runtime-object\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/runtime/runtime-object.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' build:windows --per_file_copt='external/v8/src/objects/swiss-name-dictionary\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/objects/swiss-name-dictionary.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' +build:windows --per_file_copt='external/ada-url/ada\.cpp@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/ada-url/ada\.cpp@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' # enable clang coverage: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html build:clang-coverage --copt="-fprofile-instr-generate" --linkopt="-fprofile-instr-generate" diff --git a/.github/secret_scanning.yml b/.github/secret_scanning.yml index f07fd6d91ea..240bae43171 100644 --- a/.github/secret_scanning.yml +++ b/.github/secret_scanning.yml @@ -1,3 +1,4 @@ paths-ignore: - "src/workerd/api/node/crypto_keys-test.js" - "src/workerd/api/node/crypto_dh-test.js" + - "src/workerd/jsg/src/workerd/jsg/url-test-corpus-success.h" diff --git a/WORKSPACE b/WORKSPACE index db0d46bd1f9..2f903f140b3 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -76,6 +76,16 @@ http_archive( urls = ["https://github.com/google/brotli/tarball/ec107cf015139c791f79afac0f96c3a2c45e157f"], ) +http_archive( + name = "ada-url", + build_file = "//:build/BUILD.ada-url", + sha256 = "850f5dbe0aa606a1c2f0aaa7feec3c5da6b1e09fb5e5dab9b5554469c7795ef4", + type = "zip", + url = "https://github.com/ada-url/ada/releases/download/v2.6.10/singleheader.zip", + patches = [], + patch_args = ["-p1"], +) + # ======================================================================================== # Dawn # diff --git a/build/BUILD.ada-url b/build/BUILD.ada-url new file mode 100644 index 00000000000..f49fa4073e7 --- /dev/null +++ b/build/BUILD.ada-url @@ -0,0 +1,12 @@ + +cc_library( + name = "ada-url", + hdrs = ["ada.h", "ada_c.h"], + srcs = ["ada.cpp"], + visibility = ["//visibility:public"], + include_prefix = ".", + copts = ["-w"], + defines = [ + "ADA_SSE2=1" + ] +) diff --git a/compile_flags.txt b/compile_flags.txt index 259ea8aee94..1f035e19e5a 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -4,9 +4,11 @@ -nostdinc -Ibazel-bin/external/com_googlesource_chromium_base_trace_event_common/_virtual_includes/trace_event_common -Ibazel-bin/external/dawn/include +-Ibazel-bin/external/ada-url/_virtual_includes/ada-url/ -Ibazel-bin/external/com_cloudflare_lol_html/_virtual_includes/lolhtml -Iexternal/com_google_benchmark/include/ -Iexternal/dawn/include +-Iexternal/ada-url/ -Isrc -isystem/usr/include -isystem/usr/include/x86_64-linux-gnu diff --git a/src/workerd/jsg/BUILD.bazel b/src/workerd/jsg/BUILD.bazel index 59792b2393c..4c81065ac73 100644 --- a/src/workerd/jsg/BUILD.bazel +++ b/src/workerd/jsg/BUILD.bazel @@ -26,6 +26,7 @@ wd_cc_library( ":exception", ":modules_capnp", ":observer", + ":url", "//src/workerd/util", "//src/workerd/util:sentry", "//src/workerd/util:thread-scopes", @@ -34,6 +35,17 @@ wd_cc_library( ], ) +wd_cc_library( + name = "url", + srcs = ["url.c++"], + hdrs = ["url.h"], + visibility = ["//visibility:public"], + deps = [ + "@capnp-cpp//src/kj", + "@ada-url", + ], +) + wd_cc_library( name = "exception", srcs = ["exception.c++"], diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h index 92089a8f3fb..c18010d8f54 100644 --- a/src/workerd/jsg/jsg.h +++ b/src/workerd/jsg/jsg.h @@ -2333,3 +2333,4 @@ inline v8::Local JsContext::getHandle(Lock& js) { #include "function.h" #include "iterator.h" #include "jsvalue.h" +#include "url.h" diff --git a/src/workerd/jsg/url-test-corpus-failures.h b/src/workerd/jsg/url-test-corpus-failures.h new file mode 100644 index 00000000000..85aa3efdba2 --- /dev/null +++ b/src/workerd/jsg/url-test-corpus-failures.h @@ -0,0 +1,273 @@ +// Generated from the corpus of failure-case url tests in WPT +KJ_ASSERT(Url::tryParse("http://f:b/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f: /c"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f:fifty-two/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f:999999/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("non-special://f:999999/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f: 21 / b ? d # e "_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[1::2]:3:4"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://2001::1"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://2001::1]"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://2001::1]:80"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::127.0.0.1.]"_kj, kj::Maybe("http://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("file://example:1/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://example:test/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://example%/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://[example]/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://user:pass@/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo:-80/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:/:@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://user@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:/@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https:@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:a:b@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:/a:b@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a:b@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http::@/www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:@:www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:/@:www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://@:www.example.com"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://example example.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://Goo%20 goo%7C|.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[:]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://GOO  goo.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://﷐zyx.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%ef%b7%90zyx.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("https://�"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://%EF%BF%BD"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a.b.c.xn--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://10.0.0.xn--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a.b.c.XN--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a.b.c.Xn--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://10.0.0.XN--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://10.0.0.xN--pokxncvks"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://%41.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%ef%bc%85%ef%bc%94%ef%bc%91.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%00.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%ef%bc%85%ef%bc%90%ef%bc%90.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%zz%66%a.com"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%25"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://hello%00"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://192.168.0.257"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%3g%78%63%30%2e%30%32%35%30%2E.01"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://192.168.0.1 hello"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("https://x x:12"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://[www.google.com]/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://[google.com]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::1.2.3.4x]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::1.2.3.]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::1.2.]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::.1.2]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::1.]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::.1]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::%31]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://%5B::1]"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("i"_kj, kj::Maybe("sc:sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("i"_kj, kj::Maybe("sc:sd/sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("../i"_kj, kj::Maybe("sc:sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("../i"_kj, kj::Maybe("sc:sd/sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("/i"_kj, kj::Maybe("sc:sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("/i"_kj, kj::Maybe("sc:sd/sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("?i"_kj, kj::Maybe("sc:sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("?i"_kj, kj::Maybe("sc:sd/sd"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("sc://@/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://te@s:t@/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://:/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://:12/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a\0b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://ab"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a[b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a\\b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a]b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a^b"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("sc://a|b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a\0b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a%b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a[b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a]b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a^b"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://a|b/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ab/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%00st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%01st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%02st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%03st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%04st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%05st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%06st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%07st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%08st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%09st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Ast/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Bst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Cst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Dst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Est/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%0Fst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%10st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%11st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%12st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%13st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%14st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%15st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%16st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%17st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%18st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%19st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Ast/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Bst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Cst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Dst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Est/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%1Fst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%20st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%23st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%25st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%2Fst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%3Ast/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%3Cst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%3Est/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%3Fst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%40st/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%5Bst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%5Cst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%5Dst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%7Cst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://ho%7Fst/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("ftp://example.com%80/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("ftp://example.com%A0/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://example.com%80/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://example.com%A0/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http:"_kj, kj::Maybe("https://example.org/foo/bar"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://10000000000"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://4294967296"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://0xffffffff1"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://256.256.256.256"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("https://0x100000000/test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://256.0.0.1/test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://%43%3A"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://%43%7C"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://%43|"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://C%7C"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://%43%7C/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://%43%7C/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("asdf://%43|/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("\\\\\\.\\Y:"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("\\\\\\.\\y:"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://[0:1:2:3:4:5:6:7:8]"_kj, kj::Maybe("http://example.net/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0::0::0]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:.0]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:0:]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:1:2:3:4:5:6:7.0.0.0.1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:1.00.0.0.0]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:1.290.0.0.0]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://[0:1.23.23]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://?"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://#"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://f:4294967377/c"_kj, kj::Maybe("http://example.org/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f:18446744073709551697/c"_kj, kj::Maybe("http://example.org/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://f:340282366920938463463374607431768211537/c"_kj, kj::Maybe("http://example.org/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("non-special://[:80/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://[::127.0.0.0.1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("a"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("a/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("a//"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("test-a-colon.html"_kj, kj::Maybe("a:"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("test-a-colon-b.html"_kj, kj::Maybe("a:b"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("file://­/p"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://%C2%AD/p"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("file://xn--/p"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("#"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("?"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://1.2.3.4.5"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://1.2.3.4.5."_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://0..0x300/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0..0x300./"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://256.256.256.256.256"_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://256.256.256.256.256."_kj, kj::Maybe("http://other.com/"_kj)) == kj::none); +KJ_ASSERT(Url::tryParse("http://1.2.3.08"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://1.2.3.08."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://1.2.3.09"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://09.2.3.4"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://09.2.3.4."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://01.2.3.4.5"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://01.2.3.4.5."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0x100.2.3.4"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0x100.2.3.4."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0x1.2.3.4.5"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0x1.2.3.4.5."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.1.2.3.4"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.1.2.3.4."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.2.3.4"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.2.3.4."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.09"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.09."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.0x4"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.0x4."_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://0999999999999999999/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.0x"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://foo.0XFfFfFfFfFfFfFfFfFfAcE123"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("http://💩.123/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://\0y"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://￿y"_kj) == kj::none); +KJ_ASSERT(Url::tryParse(""_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://­/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://%C2%AD/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("https://xn--/"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("data://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("data://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("data://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("javascript://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("javascript://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("javascript://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("mailto://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("mailto://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("mailto://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("intent://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("intent://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("intent://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("urn://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("urn://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("urn://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("turn://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("turn://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("turn://[:1]"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("stun://:443"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("stun://test:test"_kj) == kj::none); +KJ_ASSERT(Url::tryParse("stun://[:1]"_kj) == kj::none); diff --git a/src/workerd/jsg/url-test-corpus-success.h b/src/workerd/jsg/url-test-corpus-success.h new file mode 100644 index 00000000000..99cb7cc5896 --- /dev/null +++ b/src/workerd/jsg/url-test-corpus-success.h @@ -0,0 +1,549 @@ +// Generated from the corpus of success-case url tests in WPT +test("http://example\t.\norg"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/"_kj); +test("http://user:pass@foo:21/bar;par?b#c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://user:pass@foo:21/bar;par?b#c"_kj); +test("https://test:@test"_kj, kj::none, "https://test@test/"_kj); +test("https://:@test"_kj, kj::none, "https://test/"_kj); +test("non-special://test:@test/x"_kj, kj::none, "non-special://test@test/x"_kj); +test("non-special://:@test/x"_kj, kj::none, "non-special://test/x"_kj); +test("http:foo.com"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/foo.com"_kj); +test("\t :foo.com \n"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:foo.com"_kj); +test(" foo.com "_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/foo.com"_kj); +test("a:\t foo.com"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "a: foo.com"_kj); +test("http://f:21/ b ? d # e "_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f:21/%20b%20?%20d%20#%20e"_kj); +test("lolscheme:x x#x x"_kj, kj::none, "lolscheme:x x#x%20x"_kj); +test("http://f:/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f/c"_kj); +test("http://f:0/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f:0/c"_kj); +test("http://f:00000000000000/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f:0/c"_kj); +test("http://f:00000000000000000000080/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f/c"_kj); +test("http://f:\n/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://f/c"_kj); +test(""_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar"_kj); +test(" \t"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar"_kj); +test(":foo.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:foo.com/"_kj); +test(":foo.com\\"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:foo.com/"_kj); +test(":"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:"_kj); +test(":a"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:a"_kj); +test(":/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:/"_kj); +test(":\\"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:/"_kj); +test(":#"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:#"_kj); +test("#"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar#"_kj); +test("#/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar#/"_kj); +test("#\\"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar#\\"_kj); +test("#;?"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar#;?"_kj); +test("?"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar?"_kj); +test("/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/"_kj); +test(":23"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:23"_kj); +test("/:23"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/:23"_kj); +test("\\x"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/x"_kj); +test("\\\\x\\hello"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://x/hello"_kj); +test("::"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/::"_kj); +test("::23"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/::23"_kj); +test("foo://"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo://"_kj); +test("http://a:b@c:29/d"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://a:b@c:29/d"_kj); +test("http::@c:29"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/:@c:29"_kj); +test("http://&a:foo(b]c@d:2/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://&a:foo(b%5Dc@d:2/"_kj); +test("http://::@c@d:2"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://:%3A%40c@d:2/"_kj); +test("http://foo.com:b@d/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo.com:b@d/"_kj); +test("http://foo.com/\\@"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo.com//@"_kj); +test("http:\\\\foo.com\\"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo.com/"_kj); +test("http:\\\\a\\b:c\\d@foo.com\\"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://a/b:c/d@foo.com/"_kj); +test("foo:/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo:/"_kj); +test("foo:/bar.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo:/bar.com/"_kj); +test("foo://///////"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo://///////"_kj); +test("foo://///////bar.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo://///////bar.com/"_kj); +test("foo:////://///"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "foo:////://///"_kj); +test("c:/foo"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "c:/foo"_kj); +test("//foo/bar"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo/bar"_kj); +test("http://foo/path;a??e#f#g"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo/path;a??e#f#g"_kj); +test("http://foo/abcd?efgh?ijkl"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo/abcd?efgh?ijkl"_kj); +test("http://foo/abcd#foo?bar"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://foo/abcd#foo?bar"_kj); +test("[61:24:74]:98"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/[61:24:74]:98"_kj); +test("http:[61:27]/:foo"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/[61:27]/:foo"_kj); +test("http://[2001::1]"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://[2001::1]/"_kj); +test("http://[::127.0.0.1]"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://[::7f00:1]/"_kj); +test("http://[0:0:0:0:0:0:13.1.68.3]"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://[::d01:4403]/"_kj); +test("http://[2001::1]:80"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://[2001::1]/"_kj); +test("http:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/example.com/"_kj); +test("ftp:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ftp://example.com/"_kj); +test("https:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "https://example.com/"_kj); +test("madeupscheme:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "madeupscheme:/example.com/"_kj); +test("file:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "file:///example.com/"_kj); +test("ftps:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ftps:/example.com/"_kj); +test("gopher:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "gopher:/example.com/"_kj); +test("ws:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ws://example.com/"_kj); +test("wss:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "wss://example.com/"_kj); +test("data:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "data:/example.com/"_kj); +test("javascript:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "javascript:/example.com/"_kj); +test("mailto:/example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "mailto:/example.com/"_kj); +test("http:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/example.com/"_kj); +test("ftp:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ftp://example.com/"_kj); +test("https:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "https://example.com/"_kj); +test("madeupscheme:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "madeupscheme:example.com/"_kj); +test("ftps:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ftps:example.com/"_kj); +test("gopher:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "gopher:example.com/"_kj); +test("ws:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "ws://example.com/"_kj); +test("wss:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "wss://example.com/"_kj); +test("data:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "data:example.com/"_kj); +test("javascript:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "javascript:example.com/"_kj); +test("mailto:example.com/"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "mailto:example.com/"_kj); +test("/a/b/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/a/b/c"_kj); +test("/a/ /c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/a/%20/c"_kj); +test("/a%2fc"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/a%2fc"_kj); +test("/a/%2f/c"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/a/%2f/c"_kj); +test("#β"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar#%CE%B2"_kj); +test("data:text/html,test#test"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "data:text/html,test#test"_kj); +test("tel:1234567890"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "tel:1234567890"_kj); +test("ssh://example.com/foo/bar.git"_kj, kj::Maybe("http://example.org/"_kj), "ssh://example.com/foo/bar.git"_kj); +test("file:c:\\foo\\bar.html"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///c:/foo/bar.html"_kj); +test(" File:c|////foo\\bar.html"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///c:////foo/bar.html"_kj); +test("C|/foo/bar"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///C:/foo/bar"_kj); +test("/C|\\foo\\bar"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///C:/foo/bar"_kj); +test("//C|/foo/bar"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///C:/foo/bar"_kj); +test("//server/file"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file://server/file"_kj); +test("\\\\server\\file"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file://server/file"_kj); +test("/\\server/file"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file://server/file"_kj); +test("file:///foo/bar.txt"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///foo/bar.txt"_kj); +test("file:///home/me"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///home/me"_kj); +test("//"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///"_kj); +test("///"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///"_kj); +test("///test"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///test"_kj); +test("file://test"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file://test/"_kj); +test("file://localhost"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///"_kj); +test("file://localhost/"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///"_kj); +test("file://localhost/test"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///test"_kj); +test("test"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///tmp/mock/test"_kj); +test("file:test"_kj, kj::Maybe("file:///tmp/mock/path"_kj), "file:///tmp/mock/test"_kj); +test("http://example.com/././foo"_kj, kj::none, "http://example.com/foo"_kj); +test("http://example.com/./.foo"_kj, kj::none, "http://example.com/.foo"_kj); +test("http://example.com/foo/."_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/./"_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/bar/.."_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/bar/../"_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/..bar"_kj, kj::none, "http://example.com/foo/..bar"_kj); +test("http://example.com/foo/bar/../ton"_kj, kj::none, "http://example.com/foo/ton"_kj); +test("http://example.com/foo/bar/../ton/../../a"_kj, kj::none, "http://example.com/a"_kj); +test("http://example.com/foo/../../.."_kj, kj::none, "http://example.com/"_kj); +test("http://example.com/foo/../../../ton"_kj, kj::none, "http://example.com/ton"_kj); +test("http://example.com/foo/%2e"_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/%2e%2"_kj, kj::none, "http://example.com/foo/%2e%2"_kj); +test("http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar"_kj, kj::none, "http://example.com/%2e.bar"_kj); +test("http://example.com////../.."_kj, kj::none, "http://example.com//"_kj); +test("http://example.com/foo/bar//../.."_kj, kj::none, "http://example.com/foo/"_kj); +test("http://example.com/foo/bar//.."_kj, kj::none, "http://example.com/foo/bar/"_kj); +test("http://example.com/foo"_kj, kj::none, "http://example.com/foo"_kj); +test("http://example.com/%20foo"_kj, kj::none, "http://example.com/%20foo"_kj); +test("http://example.com/foo%"_kj, kj::none, "http://example.com/foo%"_kj); +test("http://example.com/foo%2"_kj, kj::none, "http://example.com/foo%2"_kj); +test("http://example.com/foo%2zbar"_kj, kj::none, "http://example.com/foo%2zbar"_kj); +test("http://example.com/foo%2©zbar"_kj, kj::none, "http://example.com/foo%2%C3%82%C2%A9zbar"_kj); +test("http://example.com/foo%41%7a"_kj, kj::none, "http://example.com/foo%41%7a"_kj); +test("http://example.com/foo\t‘%91"_kj, kj::none, "http://example.com/foo%C2%91%91"_kj); +test("http://example.com/foo%00%51"_kj, kj::none, "http://example.com/foo%00%51"_kj); +test("http://example.com/(%28:%3A%29)"_kj, kj::none, "http://example.com/(%28:%3A%29)"_kj); +test("http://example.com/%3A%3a%3C%3c"_kj, kj::none, "http://example.com/%3A%3a%3C%3c"_kj); +test("http://example.com/foo\tbar"_kj, kj::none, "http://example.com/foobar"_kj); +test("http://example.com\\\\foo\\\\bar"_kj, kj::none, "http://example.com//foo//bar"_kj); +test("http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd"_kj, kj::none, "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd"_kj); +test("http://example.com/@asdf%40"_kj, kj::none, "http://example.com/@asdf%40"_kj); +test("http://example.com/你好你好"_kj, kj::none, "http://example.com/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD"_kj); +test("http://example.com/‥/foo"_kj, kj::none, "http://example.com/%E2%80%A5/foo"_kj); +test("http://example.com//foo"_kj, kj::none, "http://example.com/%EF%BB%BF/foo"_kj); +test("http://example.com/‮/foo/‭/bar"_kj, kj::none, "http://example.com/%E2%80%AE/foo/%E2%80%AD/bar"_kj); +test("http://www.google.com/foo?bar=baz#"_kj, kj::none, "http://www.google.com/foo?bar=baz#"_kj); +test("http://www.google.com/foo?bar=baz# »"_kj, kj::none, "http://www.google.com/foo?bar=baz#%20%C2%BB"_kj); +test("data:test# »"_kj, kj::none, "data:test#%20%C2%BB"_kj); +test("http://www.google.com"_kj, kj::none, "http://www.google.com/"_kj); +test("http://192.0x00A80001"_kj, kj::none, "http://192.168.0.1/"_kj); +test("http://www/foo%2Ehtml"_kj, kj::none, "http://www/foo%2Ehtml"_kj); +test("http://www/foo/%2E/html"_kj, kj::none, "http://www/foo/html"_kj); +test("http://%25DOMAIN:foobar@foodomain.com/"_kj, kj::none, "http://%25DOMAIN:foobar@foodomain.com/"_kj); +test("http:\\\\www.google.com\\foo"_kj, kj::none, "http://www.google.com/foo"_kj); +test("http://foo:80/"_kj, kj::none, "http://foo/"_kj); +test("http://foo:81/"_kj, kj::none, "http://foo:81/"_kj); +test("httpa://foo:80/"_kj, kj::none, "httpa://foo:80/"_kj); +test("https://foo:443/"_kj, kj::none, "https://foo/"_kj); +test("https://foo:80/"_kj, kj::none, "https://foo:80/"_kj); +test("ftp://foo:21/"_kj, kj::none, "ftp://foo/"_kj); +test("ftp://foo:80/"_kj, kj::none, "ftp://foo:80/"_kj); +test("gopher://foo:70/"_kj, kj::none, "gopher://foo:70/"_kj); +test("gopher://foo:443/"_kj, kj::none, "gopher://foo:443/"_kj); +test("ws://foo:80/"_kj, kj::none, "ws://foo/"_kj); +test("ws://foo:81/"_kj, kj::none, "ws://foo:81/"_kj); +test("ws://foo:443/"_kj, kj::none, "ws://foo:443/"_kj); +test("ws://foo:815/"_kj, kj::none, "ws://foo:815/"_kj); +test("wss://foo:80/"_kj, kj::none, "wss://foo:80/"_kj); +test("wss://foo:81/"_kj, kj::none, "wss://foo:81/"_kj); +test("wss://foo:443/"_kj, kj::none, "wss://foo/"_kj); +test("wss://foo:815/"_kj, kj::none, "wss://foo:815/"_kj); +test("http:/example.com/"_kj, kj::none, "http://example.com/"_kj); +test("ftp:/example.com/"_kj, kj::none, "ftp://example.com/"_kj); +test("https:/example.com/"_kj, kj::none, "https://example.com/"_kj); +test("madeupscheme:/example.com/"_kj, kj::none, "madeupscheme:/example.com/"_kj); +test("file:/example.com/"_kj, kj::none, "file:///example.com/"_kj); +test("ftps:/example.com/"_kj, kj::none, "ftps:/example.com/"_kj); +test("gopher:/example.com/"_kj, kj::none, "gopher:/example.com/"_kj); +test("ws:/example.com/"_kj, kj::none, "ws://example.com/"_kj); +test("wss:/example.com/"_kj, kj::none, "wss://example.com/"_kj); +test("data:/example.com/"_kj, kj::none, "data:/example.com/"_kj); +test("javascript:/example.com/"_kj, kj::none, "javascript:/example.com/"_kj); +test("mailto:/example.com/"_kj, kj::none, "mailto:/example.com/"_kj); +test("http:example.com/"_kj, kj::none, "http://example.com/"_kj); +test("ftp:example.com/"_kj, kj::none, "ftp://example.com/"_kj); +test("https:example.com/"_kj, kj::none, "https://example.com/"_kj); +test("madeupscheme:example.com/"_kj, kj::none, "madeupscheme:example.com/"_kj); +test("ftps:example.com/"_kj, kj::none, "ftps:example.com/"_kj); +test("gopher:example.com/"_kj, kj::none, "gopher:example.com/"_kj); +test("ws:example.com/"_kj, kj::none, "ws://example.com/"_kj); +test("wss:example.com/"_kj, kj::none, "wss://example.com/"_kj); +test("data:example.com/"_kj, kj::none, "data:example.com/"_kj); +test("javascript:example.com/"_kj, kj::none, "javascript:example.com/"_kj); +test("mailto:example.com/"_kj, kj::none, "mailto:example.com/"_kj); +test("http:@www.example.com"_kj, kj::none, "http://www.example.com/"_kj); +test("http:/@www.example.com"_kj, kj::none, "http://www.example.com/"_kj); +test("http://@www.example.com"_kj, kj::none, "http://www.example.com/"_kj); +test("http:a:b@www.example.com"_kj, kj::none, "http://a:b@www.example.com/"_kj); +test("http:/a:b@www.example.com"_kj, kj::none, "http://a:b@www.example.com/"_kj); +test("http://a:b@www.example.com"_kj, kj::none, "http://a:b@www.example.com/"_kj); +test("http://@pple.com"_kj, kj::none, "http://pple.com/"_kj); +test("http::b@www.example.com"_kj, kj::none, "http://:b@www.example.com/"_kj); +test("http:/:b@www.example.com"_kj, kj::none, "http://:b@www.example.com/"_kj); +test("http://:b@www.example.com"_kj, kj::none, "http://:b@www.example.com/"_kj); +test("http:a:@www.example.com"_kj, kj::none, "http://a@www.example.com/"_kj); +test("http:/a:@www.example.com"_kj, kj::none, "http://a@www.example.com/"_kj); +test("http://a:@www.example.com"_kj, kj::none, "http://a@www.example.com/"_kj); +test("http://www.@pple.com"_kj, kj::none, "http://www.@pple.com/"_kj); +test("http://:@www.example.com"_kj, kj::none, "http://www.example.com/"_kj); +test("/"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/"_kj); +test("/test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/test.txt"_kj); +test("."_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/"_kj); +test(".."_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/"_kj); +test("test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/test.txt"_kj); +test("./test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/test.txt"_kj); +test("../test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/test.txt"_kj); +test("../aaa/test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/aaa/test.txt"_kj); +test("../../test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/test.txt"_kj); +test("中/test.txt"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example.com/%E4%B8%AD/test.txt"_kj); +test("http://www.example2.com"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example2.com/"_kj); +test("//www.example2.com"_kj, kj::Maybe("http://www.example.com/test"_kj), "http://www.example2.com/"_kj); +test("file:..."_kj, kj::Maybe("http://www.example.com/test"_kj), "file:///..."_kj); +test("file:.."_kj, kj::Maybe("http://www.example.com/test"_kj), "file:///"_kj); +test("file:a"_kj, kj::Maybe("http://www.example.com/test"_kj), "file:///a"_kj); +test("http://ExAmPlE.CoM"_kj, kj::Maybe("http://other.com/"_kj), "http://example.com/"_kj); +test("http://GOO​⁠goo.com"_kj, kj::Maybe("http://other.com/"_kj), "http://googoo.com/"_kj); +test("\0 http://example.com/ \r "_kj, kj::none, "http://example.com/"_kj); +test("http://www.foo。bar.com"_kj, kj::Maybe("http://other.com/"_kj), "http://www.foo.bar.com/"_kj); +test("https://x/�?�#�"_kj, kj::none, "https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD"_kj); +test("http://Go.com"_kj, kj::Maybe("http://other.com/"_kj), "http://go.com/"_kj); +test("http://你好你好"_kj, kj::Maybe("http://other.com/"_kj), "http://xn--6qqa088eba/"_kj); +test("https://faß.ExAmPlE/"_kj, kj::none, "https://xn--fa-hia.example/"_kj); +test("sc://faß.ExAmPlE/"_kj, kj::none, "sc://fa%C3%9F.ExAmPlE/"_kj); +test("http://%30%78%63%30%2e%30%32%35%30.01"_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.0.1/"_kj); +test("http://%30%78%63%30%2e%30%32%35%30.01%2e"_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.0.1/"_kj); +test("http://0Xc0.0250.01"_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.0.1/"_kj); +test("http://./"_kj, kj::none, "http://./"_kj); +test("http://../"_kj, kj::none, "http://../"_kj); +test("h://."_kj, kj::none, "h://."_kj); +test("http://foo:💩@example.com/bar"_kj, kj::Maybe("http://other.com/"_kj), "http://foo:%F0%9F%92%A9@example.com/bar"_kj); +test("#"_kj, kj::Maybe("test:test"_kj), "test:test#"_kj); +test("#x"_kj, kj::Maybe("mailto:x@x.com"_kj), "mailto:x@x.com#x"_kj); +test("#x"_kj, kj::Maybe("data:,"_kj), "data:,#x"_kj); +test("#x"_kj, kj::Maybe("about:blank"_kj), "about:blank#x"_kj); +test("#x:y"_kj, kj::Maybe("about:blank"_kj), "about:blank#x:y"_kj); +test("#"_kj, kj::Maybe("test:test?test"_kj), "test:test?test#"_kj); +test("https://@test@test@example:800/"_kj, kj::Maybe("http://doesnotmatter/"_kj), "https://%40test%40test@example:800/"_kj); +test("https://@@@example"_kj, kj::Maybe("http://doesnotmatter/"_kj), "https://%40%40@example/"_kj); +test("http://`{}:`{}@h/`{}?`{}"_kj, kj::Maybe("http://doesnotmatter/"_kj), "http://%60%7B%7D:%60%7B%7D@h/%60%7B%7D?`{}"_kj); +test("http://host/?'"_kj, kj::none, "http://host/?%27"_kj); +test("notspecial://host/?'"_kj, kj::none, "notspecial://host/?'"_kj); +test("/some/path"_kj, kj::Maybe("http://user@example.org/smth"_kj), "http://user@example.org/some/path"_kj); +test(""_kj, kj::Maybe("http://user:pass@example.org:21/smth"_kj), "http://user:pass@example.org:21/smth"_kj); +test("/some/path"_kj, kj::Maybe("http://user:pass@example.org:21/smth"_kj), "http://user:pass@example.org:21/some/path"_kj); +test("i"_kj, kj::Maybe("sc:/pa/pa"_kj), "sc:/pa/i"_kj); +test("i"_kj, kj::Maybe("sc://ho/pa"_kj), "sc://ho/i"_kj); +test("i"_kj, kj::Maybe("sc:///pa/pa"_kj), "sc:///pa/i"_kj); +test("../i"_kj, kj::Maybe("sc:/pa/pa"_kj), "sc:/i"_kj); +test("../i"_kj, kj::Maybe("sc://ho/pa"_kj), "sc://ho/i"_kj); +test("../i"_kj, kj::Maybe("sc:///pa/pa"_kj), "sc:///i"_kj); +test("/i"_kj, kj::Maybe("sc:/pa/pa"_kj), "sc:/i"_kj); +test("/i"_kj, kj::Maybe("sc://ho/pa"_kj), "sc://ho/i"_kj); +test("/i"_kj, kj::Maybe("sc:///pa/pa"_kj), "sc:///i"_kj); +test("?i"_kj, kj::Maybe("sc:/pa/pa"_kj), "sc:/pa/pa?i"_kj); +test("?i"_kj, kj::Maybe("sc://ho/pa"_kj), "sc://ho/pa?i"_kj); +test("?i"_kj, kj::Maybe("sc:///pa/pa"_kj), "sc:///pa/pa?i"_kj); +test("#i"_kj, kj::Maybe("sc:sd"_kj), "sc:sd#i"_kj); +test("#i"_kj, kj::Maybe("sc:sd/sd"_kj), "sc:sd/sd#i"_kj); +test("#i"_kj, kj::Maybe("sc:/pa/pa"_kj), "sc:/pa/pa#i"_kj); +test("#i"_kj, kj::Maybe("sc://ho/pa"_kj), "sc://ho/pa#i"_kj); +test("#i"_kj, kj::Maybe("sc:///pa/pa"_kj), "sc:///pa/pa#i"_kj); +test("about:/../"_kj, kj::none, "about:/"_kj); +test("data:/../"_kj, kj::none, "data:/"_kj); +test("javascript:/../"_kj, kj::none, "javascript:/"_kj); +test("mailto:/../"_kj, kj::none, "mailto:/"_kj); +test("sc://ñ.test/"_kj, kj::none, "sc://%C3%B1.test/"_kj); +test("sc://%/"_kj, kj::none, "sc://%/"_kj); +test("x"_kj, kj::Maybe("sc://ñ"_kj), "sc://%C3%B1/x"_kj); +test("sc:\\../"_kj, kj::none, "sc:\\../"_kj); +test("sc::a@example.net"_kj, kj::none, "sc::a@example.net"_kj); +test("wow:%NBD"_kj, kj::none, "wow:%NBD"_kj); +test("wow:%1G"_kj, kj::none, "wow:%1G"_kj); +test("wow:￿"_kj, kj::none, "wow:%EF%BF%BF"_kj); +test("http://example.com/�𐟾�﷐﷏﷯ﷰ￾￿?�𐟾�﷐﷏﷯ﷰ￾￿"_kj, kj::none, "http://example.com/%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF?%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF"_kj); +test("foo://ho\tst/"_kj, kj::none, "foo://host/"_kj); +test("foo://ho\nst/"_kj, kj::none, "foo://host/"_kj); +test("foo://ho\rst/"_kj, kj::none, "foo://host/"_kj); +test("http://ho\tst/"_kj, kj::none, "http://host/"_kj); +test("http://ho\nst/"_kj, kj::none, "http://host/"_kj); +test("http://ho\rst/"_kj, kj::none, "http://host/"_kj); +test("http://!\"$&'()*+,-.;=_`{}~/"_kj, kj::none, "http://!\"$&'()*+,-.;=_`{}~/"_kj); +test("sc:// !\"$%&'()*+,-.;=_`{}~/"_kj, kj::none, "sc://%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~/"_kj); +test("ftp://%e2%98%83"_kj, kj::none, "ftp://xn--n3h/"_kj); +test("https://%e2%98%83"_kj, kj::none, "https://xn--n3h/"_kj); +test("http://127.0.0.1:10100/relative_import.html"_kj, kj::none, "http://127.0.0.1:10100/relative_import.html"_kj); +test("http://facebook.com/?foo=%7B%22abc%22"_kj, kj::none, "http://facebook.com/?foo=%7B%22abc%22"_kj); +test("https://localhost:3000/jqueryui@1.2.3"_kj, kj::none, "https://localhost:3000/jqueryui@1.2.3"_kj); +test("h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg"_kj, kj::none, "http://host:9000/path?query#frag"_kj); +test("?a=b&c=d"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar?a=b&c=d"_kj); +test("??a=b&c=d"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar??a=b&c=d"_kj); +test("http:"_kj, kj::Maybe("http://example.org/foo/bar"_kj), "http://example.org/foo/bar"_kj); +test("sc:"_kj, kj::Maybe("https://example.org/foo/bar"_kj), "sc:"_kj); +test("http://foo.bar/baz?qux#foobar"_kj, kj::none, "http://foo.bar/baz?qux#foo%08bar"_kj); +test("http://foo.bar/baz?qux#foo\"bar"_kj, kj::none, "http://foo.bar/baz?qux#foo%22bar"_kj); +test("http://foo.bar/baz?qux#foobar"_kj, kj::none, "http://foo.bar/baz?qux#foo%3Ebar"_kj); +test("http://foo.bar/baz?qux#foo`bar"_kj, kj::none, "http://foo.bar/baz?qux#foo%60bar"_kj); +test("http://1.2.3.4/"_kj, kj::Maybe("http://other.com/"_kj), "http://1.2.3.4/"_kj); +test("http://1.2.3.4./"_kj, kj::Maybe("http://other.com/"_kj), "http://1.2.3.4/"_kj); +test("http://192.168.257"_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.1.1/"_kj); +test("http://192.168.257."_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.1.1/"_kj); +test("http://192.168.257.com"_kj, kj::Maybe("http://other.com/"_kj), "http://192.168.257.com/"_kj); +test("http://256"_kj, kj::Maybe("http://other.com/"_kj), "http://0.0.1.0/"_kj); +test("http://256.com"_kj, kj::Maybe("http://other.com/"_kj), "http://256.com/"_kj); +test("http://999999999"_kj, kj::Maybe("http://other.com/"_kj), "http://59.154.201.255/"_kj); +test("http://999999999."_kj, kj::Maybe("http://other.com/"_kj), "http://59.154.201.255/"_kj); +test("http://999999999.com"_kj, kj::Maybe("http://other.com/"_kj), "http://999999999.com/"_kj); +test("http://10000000000.com"_kj, kj::Maybe("http://other.com/"_kj), "http://10000000000.com/"_kj); +test("http://4294967295"_kj, kj::Maybe("http://other.com/"_kj), "http://255.255.255.255/"_kj); +test("http://0xffffffff"_kj, kj::Maybe("http://other.com/"_kj), "http://255.255.255.255/"_kj); +test("https://0x.0x.0"_kj, kj::none, "https://0.0.0.0/"_kj); +test("file:///C%3A/"_kj, kj::none, "file:///C%3A/"_kj); +test("file:///C%7C/"_kj, kj::none, "file:///C%7C/"_kj); +test("asdf://%43%7C/"_kj, kj::none, "asdf://%43%7C/"_kj); +test("pix/submit.gif"_kj, kj::Maybe("file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html"_kj), "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif"_kj); +test(".."_kj, kj::Maybe("file:///C:/"_kj), "file:///C:/"_kj); +test(".."_kj, kj::Maybe("file:///"_kj), "file:///"_kj); +test("/"_kj, kj::Maybe("file:///C:/a/b"_kj), "file:///C:/"_kj); +test("/"_kj, kj::Maybe("file://h/C:/a/b"_kj), "file://h/C:/"_kj); +test("/"_kj, kj::Maybe("file://h/a/b"_kj), "file://h/"_kj); +test("//d:"_kj, kj::Maybe("file:///C:/a/b"_kj), "file:///d:"_kj); +test("//d:/.."_kj, kj::Maybe("file:///C:/a/b"_kj), "file:///d:/"_kj); +test(".."_kj, kj::Maybe("file:///ab:/"_kj), "file:///"_kj); +test(".."_kj, kj::Maybe("file:///1:/"_kj), "file:///"_kj); +test(""_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?test"_kj); +test("file:"_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?test"_kj); +test("?x"_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?x"_kj); +test("file:?x"_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?x"_kj); +test("#x"_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?test#x"_kj); +test("file:#x"_kj, kj::Maybe("file:///test?test#test"_kj), "file:///test?test#x"_kj); +test("file:\\\\//"_kj, kj::none, "file:////"_kj); +test("file:\\\\\\\\"_kj, kj::none, "file:////"_kj); +test("file:\\\\\\\\?fox"_kj, kj::none, "file:////?fox"_kj); +test("file:\\\\\\\\#guppy"_kj, kj::none, "file:////#guppy"_kj); +test("file://spider///"_kj, kj::none, "file://spider///"_kj); +test("file:\\\\localhost//"_kj, kj::none, "file:////"_kj); +test("file:///localhost//cat"_kj, kj::none, "file:///localhost//cat"_kj); +test("file://\\/localhost//cat"_kj, kj::none, "file:////localhost//cat"_kj); +test("file://localhost//a//../..//"_kj, kj::none, "file://///"_kj); +test("/////mouse"_kj, kj::Maybe("file:///elephant"_kj), "file://///mouse"_kj); +test("\\//pig"_kj, kj::Maybe("file://lion/"_kj), "file:///pig"_kj); +test("\\/localhost//pig"_kj, kj::Maybe("file://lion/"_kj), "file:////pig"_kj); +test("//localhost//pig"_kj, kj::Maybe("file://lion/"_kj), "file:////pig"_kj); +test("/..//localhost//pig"_kj, kj::Maybe("file://lion/"_kj), "file://lion//localhost//pig"_kj); +test("file://"_kj, kj::Maybe("file://ape/"_kj), "file:///"_kj); +test("/rooibos"_kj, kj::Maybe("file://tea/"_kj), "file://tea/rooibos"_kj); +test("/?chai"_kj, kj::Maybe("file://tea/"_kj), "file://tea/?chai"_kj); +test("C|"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:"_kj); +test("C|"_kj, kj::Maybe("file://host/D:/dir1/dir2/file"_kj), "file://host/C:"_kj); +test("C|#"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:#"_kj); +test("C|?"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:?"_kj); +test("C|/"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:/"_kj); +test("C|\n/"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:/"_kj); +test("C|\\"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/C:/"_kj); +test("C"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/dir/C"_kj); +test("C|a"_kj, kj::Maybe("file://host/dir/file"_kj), "file://host/dir/C|a"_kj); +test("/c:/foo/bar"_kj, kj::Maybe("file:///c:/baz/qux"_kj), "file:///c:/foo/bar"_kj); +test("/c|/foo/bar"_kj, kj::Maybe("file:///c:/baz/qux"_kj), "file:///c:/foo/bar"_kj); +test("file:\\c:\\foo\\bar"_kj, kj::Maybe("file:///c:/baz/qux"_kj), "file:///c:/foo/bar"_kj); +test("/c:/foo/bar"_kj, kj::Maybe("file://host/path"_kj), "file://host/c:/foo/bar"_kj); +test("file://example.net/C:/"_kj, kj::none, "file://example.net/C:/"_kj); +test("file://1.2.3.4/C:/"_kj, kj::none, "file://1.2.3.4/C:/"_kj); +test("file://[1::8]/C:/"_kj, kj::none, "file://[1::8]/C:/"_kj); +test("C|/"_kj, kj::Maybe("file://host/"_kj), "file://host/C:/"_kj); +test("/C:/"_kj, kj::Maybe("file://host/"_kj), "file://host/C:/"_kj); +test("file:C:/"_kj, kj::Maybe("file://host/"_kj), "file://host/C:/"_kj); +test("file:/C:/"_kj, kj::Maybe("file://host/"_kj), "file://host/C:/"_kj); +test("//C:/"_kj, kj::Maybe("file://host/"_kj), "file:///C:/"_kj); +test("file://C:/"_kj, kj::Maybe("file://host/"_kj), "file:///C:/"_kj); +test("///C:/"_kj, kj::Maybe("file://host/"_kj), "file:///C:/"_kj); +test("file:///C:/"_kj, kj::Maybe("file://host/"_kj), "file:///C:/"_kj); +test("file:/C|/"_kj, kj::none, "file:///C:/"_kj); +test("file://C|/"_kj, kj::none, "file:///C:/"_kj); +test("file:"_kj, kj::none, "file:///"_kj); +test("file:?q=v"_kj, kj::none, "file:///?q=v"_kj); +test("file:#frag"_kj, kj::none, "file:///#frag"_kj); +test("file:///Y:"_kj, kj::none, "file:///Y:"_kj); +test("file:///Y:/"_kj, kj::none, "file:///Y:/"_kj); +test("file:///./Y"_kj, kj::none, "file:///Y"_kj); +test("file:///./Y:"_kj, kj::none, "file:///Y:"_kj); +test("file:///y:"_kj, kj::none, "file:///y:"_kj); +test("file:///y:/"_kj, kj::none, "file:///y:/"_kj); +test("file:///./y"_kj, kj::none, "file:///y"_kj); +test("file:///./y:"_kj, kj::none, "file:///y:"_kj); +test("file://localhost//a//../..//foo"_kj, kj::none, "file://///foo"_kj); +test("file://localhost////foo"_kj, kj::none, "file://////foo"_kj); +test("file:////foo"_kj, kj::none, "file:////foo"_kj); +test("file:///one/two"_kj, kj::Maybe("file:///"_kj), "file:///one/two"_kj); +test("file:////one/two"_kj, kj::Maybe("file:///"_kj), "file:////one/two"_kj); +test("//one/two"_kj, kj::Maybe("file:///"_kj), "file://one/two"_kj); +test("///one/two"_kj, kj::Maybe("file:///"_kj), "file:///one/two"_kj); +test("////one/two"_kj, kj::Maybe("file:///"_kj), "file:////one/two"_kj); +test("file:///.//"_kj, kj::Maybe("file:////"_kj), "file:////"_kj); +test("file:.//p"_kj, kj::none, "file:////p"_kj); +test("file:/.//p"_kj, kj::none, "file:////p"_kj); +test("http://[1:0::]"_kj, kj::Maybe("http://example.net/"_kj), "http://[1::]/"_kj); +test("sc://ñ"_kj, kj::none, "sc://%C3%B1"_kj); +test("sc://ñ?x"_kj, kj::none, "sc://%C3%B1?x"_kj); +test("sc://ñ#x"_kj, kj::none, "sc://%C3%B1#x"_kj); +test("#x"_kj, kj::Maybe("sc://ñ"_kj), "sc://%C3%B1#x"_kj); +test("?x"_kj, kj::Maybe("sc://ñ"_kj), "sc://%C3%B1?x"_kj); +test("sc://?"_kj, kj::none, "sc://?"_kj); +test("sc://#"_kj, kj::none, "sc://#"_kj); +test("///"_kj, kj::Maybe("sc://x/"_kj), "sc:///"_kj); +test("////"_kj, kj::Maybe("sc://x/"_kj), "sc:////"_kj); +test("////x/"_kj, kj::Maybe("sc://x/"_kj), "sc:////x/"_kj); +test("tftp://foobar.com/someconfig;mode=netascii"_kj, kj::none, "tftp://foobar.com/someconfig;mode=netascii"_kj); +test("telnet://user:pass@foobar.com:23/"_kj, kj::none, "telnet://user:pass@foobar.com:23/"_kj); +test("ut2004://10.10.10.10:7777/Index.ut2"_kj, kj::none, "ut2004://10.10.10.10:7777/Index.ut2"_kj); +test("redis://foo:bar@somehost:6379/0?baz=bam&qux=baz"_kj, kj::none, "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz"_kj); +test("rsync://foo@host:911/sup"_kj, kj::none, "rsync://foo@host:911/sup"_kj); +test("git://github.com/foo/bar.git"_kj, kj::none, "git://github.com/foo/bar.git"_kj); +test("irc://myserver.com:6999/channel?passwd"_kj, kj::none, "irc://myserver.com:6999/channel?passwd"_kj); +test("dns://fw.example.org:9999/foo.bar.org?type=TXT"_kj, kj::none, "dns://fw.example.org:9999/foo.bar.org?type=TXT"_kj); +test("ldap://localhost:389/ou=People,o=JNDITutorial"_kj, kj::none, "ldap://localhost:389/ou=People,o=JNDITutorial"_kj); +test("git+https://github.com/foo/bar"_kj, kj::none, "git+https://github.com/foo/bar"_kj); +test("urn:ietf:rfc:2648"_kj, kj::none, "urn:ietf:rfc:2648"_kj); +test("tag:joe@example.org,2001:foo/bar"_kj, kj::none, "tag:joe@example.org,2001:foo/bar"_kj); +test("non-spec:/.//"_kj, kj::none, "non-spec:/.//"_kj); +test("non-spec:/..//"_kj, kj::none, "non-spec:/.//"_kj); +test("non-spec:/a/..//"_kj, kj::none, "non-spec:/.//"_kj); +test("non-spec:/.//path"_kj, kj::none, "non-spec:/.//path"_kj); +test("non-spec:/..//path"_kj, kj::none, "non-spec:/.//path"_kj); +test("non-spec:/a/..//path"_kj, kj::none, "non-spec:/.//path"_kj); +test("/.//path"_kj, kj::Maybe("non-spec:/p"_kj), "non-spec:/.//path"_kj); +test("/..//path"_kj, kj::Maybe("non-spec:/p"_kj), "non-spec:/.//path"_kj); +test("..//path"_kj, kj::Maybe("non-spec:/p"_kj), "non-spec:/.//path"_kj); +test("a/..//path"_kj, kj::Maybe("non-spec:/p"_kj), "non-spec:/.//path"_kj); +test(""_kj, kj::Maybe("non-spec:/..//p"_kj), "non-spec:/.//p"_kj); +test("path"_kj, kj::Maybe("non-spec:/..//p"_kj), "non-spec:/.//path"_kj); +test("../path"_kj, kj::Maybe("non-spec:/.//p"_kj), "non-spec:/path"_kj); +test("non-special://%E2%80%A0/"_kj, kj::none, "non-special://%E2%80%A0/"_kj); +test("non-special://H%4fSt/path"_kj, kj::none, "non-special://H%4fSt/path"_kj); +test("non-special://[1:2:0:0:5:0:0:0]/"_kj, kj::none, "non-special://[1:2:0:0:5::]/"_kj); +test("non-special://[1:2:0:0:0:0:0:3]/"_kj, kj::none, "non-special://[1:2::3]/"_kj); +test("non-special://[1:2::3]:80/"_kj, kj::none, "non-special://[1:2::3]:80/"_kj); +test("blob:https://example.com:443/"_kj, kj::none, "blob:https://example.com:443/"_kj); +test("blob:http://example.org:88/"_kj, kj::none, "blob:http://example.org:88/"_kj); +test("blob:d3958f5c-0777-0845-9dcf-2cb28783acaf"_kj, kj::none, "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf"_kj); +test("blob:"_kj, kj::none, "blob:"_kj); +test("blob:blob:"_kj, kj::none, "blob:blob:"_kj); +test("blob:blob:https://example.org/"_kj, kj::none, "blob:blob:https://example.org/"_kj); +test("blob:about:blank"_kj, kj::none, "blob:about:blank"_kj); +test("blob:file://host/path"_kj, kj::none, "blob:file://host/path"_kj); +test("blob:ftp://host/path"_kj, kj::none, "blob:ftp://host/path"_kj); +test("blob:ws://example.org/"_kj, kj::none, "blob:ws://example.org/"_kj); +test("blob:wss://example.org/"_kj, kj::none, "blob:wss://example.org/"_kj); +test("blob:http%3a//example.org/"_kj, kj::none, "blob:http%3a//example.org/"_kj); +test("http://0x7f.0.0.0x7g"_kj, kj::none, "http://0x7f.0.0.0x7g/"_kj); +test("http://0X7F.0.0.0X7G"_kj, kj::none, "http://0x7f.0.0.0x7g/"_kj); +test("http://[0:1:0:1:0:1:0:1]"_kj, kj::none, "http://[0:1:0:1:0:1:0:1]/"_kj); +test("http://[1:0:1:0:1:0:1:0]"_kj, kj::none, "http://[1:0:1:0:1:0:1:0]/"_kj); +test("http://example.org/test?\""_kj, kj::none, "http://example.org/test?%22"_kj); +test("http://example.org/test?#"_kj, kj::none, "http://example.org/test?#"_kj); +test("http://example.org/test?<"_kj, kj::none, "http://example.org/test?%3C"_kj); +test("http://example.org/test?>"_kj, kj::none, "http://example.org/test?%3E"_kj); +test("http://example.org/test?⌣"_kj, kj::none, "http://example.org/test?%E2%8C%A3"_kj); +test("http://example.org/test?%23%23"_kj, kj::none, "http://example.org/test?%23%23"_kj); +test("http://example.org/test?%GH"_kj, kj::none, "http://example.org/test?%GH"_kj); +test("http://example.org/test?a#%EF"_kj, kj::none, "http://example.org/test?a#%EF"_kj); +test("http://example.org/test?a#%GH"_kj, kj::none, "http://example.org/test?a#%GH"_kj); +test("test-a-colon-slash.html"_kj, kj::Maybe("a:/"_kj), "a:/test-a-colon-slash.html"_kj); +test("test-a-colon-slash-slash.html"_kj, kj::Maybe("a://"_kj), "a:///test-a-colon-slash-slash.html"_kj); +test("test-a-colon-slash-b.html"_kj, kj::Maybe("a:/b"_kj), "a:/test-a-colon-slash-b.html"_kj); +test("test-a-colon-slash-slash-b.html"_kj, kj::Maybe("a://b"_kj), "a://b/test-a-colon-slash-slash-b.html"_kj); +test("http://example.org/test?a#b\0c"_kj, kj::none, "http://example.org/test?a#b%00c"_kj); +test("non-spec://example.org/test?a#b\0c"_kj, kj::none, "non-spec://example.org/test?a#b%00c"_kj); +test("non-spec:/test?a#b\0c"_kj, kj::none, "non-spec:/test?a#b%00c"_kj); +test("10.0.0.7:8080/foo.html"_kj, kj::Maybe("file:///some/dir/bar.html"_kj), "file:///some/dir/10.0.0.7:8080/foo.html"_kj); +test("a!@$*=/foo.html"_kj, kj::Maybe("file:///some/dir/bar.html"_kj), "file:///some/dir/a!@$*=/foo.html"_kj); +test("a1234567890-+.:foo/bar"_kj, kj::Maybe("http://example.com/dir/file"_kj), "a1234567890-+.:foo/bar"_kj); +test("file://a­b/p"_kj, kj::none, "file://ab/p"_kj); +test("file://a%C2%ADb/p"_kj, kj::none, "file://ab/p"_kj); +test("file://loC𝐀𝐋𝐇𝐨𝐬𝐭/usr/bin"_kj, kj::none, "file:///usr/bin"_kj); +test("#link"_kj, kj::Maybe("https://example.org/##link"_kj), "https://example.org/#link"_kj); +test("non-special:cannot-be-a-base-url-\0~€"_kj, kj::none, "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80"_kj); +test("https://www.example.com/path{path.html?query'=query#fragment<fragment"_kj, kj::none, "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment"_kj); +test("https://user:pass[@foo/bar"_kj, kj::Maybe("http://example.org"_kj), "https://user:pass%5B%7F@foo/bar"_kj); +test("foo:// !\"$%&'()*+,-.;<=>@[\\]^_`{|}~@host/"_kj, kj::none, "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/"_kj); +test("wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/"_kj, kj::none, "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/"_kj); +test("foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/"_kj, kj::none, "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/"_kj); +test("wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/"_kj, kj::none, "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/"_kj); +test("foo://!\"$%&'()*+,-.;=_`{}~/"_kj, kj::none, "foo://!\"$%&'()*+,-.;=_`{}~/"_kj); +test("wss://!\"$&'()*+,-.;=_`{}~/"_kj, kj::none, "wss://!\"$&'()*+,-.;=_`{}~/"_kj); +test("foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~"_kj, kj::none, "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~"_kj); +test("wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~"_kj, kj::none, "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~"_kj); +test("foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~"_kj, kj::none, "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~"_kj); +test("wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~"_kj, kj::none, "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~"_kj); +test("foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"_kj, kj::none, "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~"_kj); +test("wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"_kj, kj::none, "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~"_kj); +test("abc:rootless"_kj, kj::Maybe("abc://host/path"_kj), "abc:rootless"_kj); +test("abc:rootless"_kj, kj::Maybe("abc:/path"_kj), "abc:rootless"_kj); +test("abc:rootless"_kj, kj::Maybe("abc:path"_kj), "abc:rootless"_kj); +test("abc:/rooted"_kj, kj::Maybe("abc://host/path"_kj), "abc:/rooted"_kj); +test("http://foo.09.."_kj, kj::none, "http://foo.09../"_kj); +test("https://x/\0y"_kj, kj::none, "https://x/%00y"_kj); +test("https://x/?\0y"_kj, kj::none, "https://x/?%00y"_kj); +test("https://x/?#\0y"_kj, kj::none, "https://x/?#%00y"_kj); +test("https://x/￿y"_kj, kj::none, "https://x/%EF%BF%BFy"_kj); +test("https://x/?￿y"_kj, kj::none, "https://x/?%EF%BF%BFy"_kj); +test("https://x/?#￿y"_kj, kj::none, "https://x/?#%EF%BF%BFy"_kj); +test("non-special:\0y"_kj, kj::none, "non-special:%00y"_kj); +test("non-special:x/\0y"_kj, kj::none, "non-special:x/%00y"_kj); +test("non-special:x/?\0y"_kj, kj::none, "non-special:x/?%00y"_kj); +test("non-special:x/?#\0y"_kj, kj::none, "non-special:x/?#%00y"_kj); +test("non-special:￿y"_kj, kj::none, "non-special:%EF%BF%BFy"_kj); +test("non-special:x/￿y"_kj, kj::none, "non-special:x/%EF%BF%BFy"_kj); +test("non-special:x/?￿y"_kj, kj::none, "non-special:x/?%EF%BF%BFy"_kj); +test("non-special:x/?#￿y"_kj, kj::none, "non-special:x/?#%EF%BF%BFy"_kj); +test("https://example.com/\"quoted\""_kj, kj::none, "https://example.com/%22quoted%22"_kj); +test("https://a%C2%ADb/"_kj, kj::none, "https://ab/"_kj); +test("data://example.com:8080/pathname?search#hash"_kj, kj::none, "data://example.com:8080/pathname?search#hash"_kj); +test("data:///test"_kj, kj::none, "data:///test"_kj); +test("data://test/a/../b"_kj, kj::none, "data://test/b"_kj); +test("javascript://example.com:8080/pathname?search#hash"_kj, kj::none, "javascript://example.com:8080/pathname?search#hash"_kj); +test("javascript:///test"_kj, kj::none, "javascript:///test"_kj); +test("javascript://test/a/../b"_kj, kj::none, "javascript://test/b"_kj); +test("mailto://example.com:8080/pathname?search#hash"_kj, kj::none, "mailto://example.com:8080/pathname?search#hash"_kj); +test("mailto:///test"_kj, kj::none, "mailto:///test"_kj); +test("mailto://test/a/../b"_kj, kj::none, "mailto://test/b"_kj); +test("intent://example.com:8080/pathname?search#hash"_kj, kj::none, "intent://example.com:8080/pathname?search#hash"_kj); +test("intent:///test"_kj, kj::none, "intent:///test"_kj); +test("intent://test/a/../b"_kj, kj::none, "intent://test/b"_kj); +test("urn://example.com:8080/pathname?search#hash"_kj, kj::none, "urn://example.com:8080/pathname?search#hash"_kj); +test("urn:///test"_kj, kj::none, "urn:///test"_kj); +test("urn://test/a/../b"_kj, kj::none, "urn://test/b"_kj); +test("turn://example.com:8080/pathname?search#hash"_kj, kj::none, "turn://example.com:8080/pathname?search#hash"_kj); +test("turn:///test"_kj, kj::none, "turn:///test"_kj); +test("turn://test/a/../b"_kj, kj::none, "turn://test/b"_kj); +test("stun://example.com:8080/pathname?search#hash"_kj, kj::none, "stun://example.com:8080/pathname?search#hash"_kj); +test("stun:///test"_kj, kj::none, "stun:///test"_kj); +test("stun://test/a/../b"_kj, kj::none, "stun://test/b"_kj); +test("w://x:0"_kj, kj::none, "w://x:0"_kj); +test("west://x:0"_kj, kj::none, "west://x:0"_kj); diff --git a/src/workerd/jsg/url-test.c++ b/src/workerd/jsg/url-test.c++ new file mode 100644 index 00000000000..402122cfefd --- /dev/null +++ b/src/workerd/jsg/url-test.c++ @@ -0,0 +1,79 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "jsg-test.h" +#include "url.h" +#include + +namespace workerd::jsg::test { +namespace { + +KJ_TEST("Basics") { + Url theUrl = nullptr; + KJ_IF_SOME(url, Url::tryParse("http://example.org:81"_kj)) { + KJ_ASSERT(url.getOrigin() == "http://example.org:81"_kj); + KJ_ASSERT(url.getHref() == "http://example.org:81/"_kj); + KJ_ASSERT(url.getProtocol() == "http:"_kj); + KJ_ASSERT(url.getHostname() == "example.org"_kj); + KJ_ASSERT(url.getHost() == "example.org:81"_kj); + KJ_ASSERT(url.getPort() == "81"_kj); + KJ_ASSERT(url.getPathname() == "/"_kj); + KJ_ASSERT(url.getSchemeType() == Url::SchemeType::HTTP); + KJ_ASSERT(url.getHostType() == Url::HostType::DEFAULT); + KJ_ASSERT(url.getUsername() == ""_kj); + KJ_ASSERT(url.getPassword() == ""_kj); + KJ_ASSERT(url.getHash() == ""_kj); + KJ_ASSERT(url.getSearch() == ""_kj); + + theUrl = url.clone(); + KJ_ASSERT(theUrl == url); + theUrl = kj::mv(url); + + auto res = KJ_ASSERT_NONNULL(theUrl.resolve("abc"_kj)); + KJ_ASSERT(res.getHref() == "http://example.org:81/abc"_kj); + + // jsg::Urls support KJ_STRINGIFY + KJ_ASSERT(kj::str(res) == "http://example.org:81/abc"); + + // jsg::Urls are suitable to be used as keys in a hashset, hashmap + kj::HashSet urls; + urls.insert(res.clone()); + KJ_ASSERT(urls.contains(res)); + + kj::HashMap urlmap; + urlmap.insert(res.clone(), 1); + KJ_ASSERT(KJ_ASSERT_NONNULL(urlmap.find(res)) == 1); + } else { + KJ_FAIL_ASSERT("url could not be parsed"); + } + + KJ_ASSERT(Url::idnToAscii("täst.de"_kj) == "xn--tst-qla.de"_kj); + KJ_ASSERT(Url::idnToUnicode("xn--tst-qla.de"_kj) == "täst.de"_kj); +} + +KJ_TEST("Non-special URL") { + auto url = KJ_ASSERT_NONNULL(Url::tryParse("abc://123"_kj)); + KJ_ASSERT(url.getOrigin() == "null"_kj); + KJ_ASSERT(url.getProtocol() == "abc:"_kj); +} + +KJ_TEST("Invalid Urls") { +#include "url-test-corpus-failures.h" +} + +void test(kj::StringPtr input, + kj::Maybe base, + kj::StringPtr href) { + KJ_ASSERT(Url::canParse(input, base)); + auto url = KJ_ASSERT_NONNULL(Url::tryParse(input, base)); + KJ_ASSERT(url.getHref() == href); +} + +KJ_TEST("Valid Urls") { + KJ_ASSERT_NONNULL(Url::tryParse(""_kj, "http://example.org"_kj)); +#include "url-test-corpus-success.h" +} + +} // namespace +} // namespace workerd::jsg::test diff --git a/src/workerd/jsg/url.c++ b/src/workerd/jsg/url.c++ new file mode 100644 index 00000000000..d73d3d8bdb3 --- /dev/null +++ b/src/workerd/jsg/url.c++ @@ -0,0 +1,261 @@ +#include "url.h" +#include + +extern "C" { + #include +} + +#include + +namespace workerd::jsg { + +namespace { +class AdaUrlDisposer : public kj::Disposer { +public: + static const AdaUrlDisposer INSTANCE; + +protected: + void disposeImpl(void* pointer) const override { + ada_free(pointer); + } +}; +const AdaUrlDisposer AdaUrlDisposer::INSTANCE; + +class AdaOwnedStringDisposer : public kj::ArrayDisposer { +public: + static const AdaOwnedStringDisposer INSTANCE; + +protected: + void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount, + size_t capacity, void (*destroyElement)(void*)) const { + ada_owned_string data = { + static_cast(firstElement), + elementCount }; + ada_free_owned_string(data); + } +}; +const AdaOwnedStringDisposer AdaOwnedStringDisposer::INSTANCE; + +kj::Own wrap(ada_url url) { + return kj::Own(url, AdaUrlDisposer::INSTANCE); +} + +ada_url get(const kj::Own& inner) { + const void* value = inner.get(); + KJ_DASSERT(value != nullptr); + return const_cast(value); +} + +} // namespace + +Url::Url(kj::Own inner) : inner(kj::mv(inner)) {} + +Url::Url(Url&& other) : inner(kj::mv(other.inner)) {} + +Url& Url::operator=(Url&& other) { + inner = kj::mv(other.inner); + return *this; +} + +bool Url::operator==(const Url& other) const { + return getHref() == other.getHref(); +} + +bool Url::equal(const Url& other, EquivalenceOption option) const { + if (option == EquivalenceOption::DEFAULT) { + return *this == other; + } + + // If we are ignoring fragments, we'll compare each component separately: + return other.getProtocol() == getProtocol() && + other.getHost() == getHost() && + other.getUsername() == getUsername() && + other.getPassword() == getPassword() && + other.getPathname() == getPathname() && + other.getSearch() == getSearch(); + } + +bool Url::canParse(kj::StringPtr input, kj::Maybe base) { + return canParse(kj::ArrayPtr(input), base); +} + +bool Url::canParse(kj::ArrayPtr input, + kj::Maybe> base) { + KJ_IF_SOME(b, base) { + return ada_can_parse_with_base(input.begin(), input.size(), + b.begin(), b.size()); + } + return ada_can_parse(input.begin(), input.size()); +} + +kj::Maybe Url::tryParse(kj::StringPtr input, + kj::Maybe base) { + return tryParse(kj::ArrayPtr(input), base); +} + +kj::Maybe Url::tryParse(kj::ArrayPtr input, + kj::Maybe> base) { + ada_url result = nullptr; + KJ_IF_SOME(b, base) { + result = ada_parse_with_base(input.begin(), input.size(), + b.begin(), b.size()); + } else { + result = ada_parse(input.begin(), input.size()); + } + if (!ada_is_valid(result)) return kj::none; + return Url(wrap(result)); +} + +kj::Maybe Url::resolve(kj::ArrayPtr input) { + return tryParse(input, getHref()); +} + +kj::ArrayPtr Url::getHref() const { + ada_string href = ada_get_href(get(inner)); + return kj::ArrayPtr(href.data, href.length); +} + +kj::ArrayPtr Url::getUsername() const { + ada_string username = ada_get_username(get(inner)); + return kj::ArrayPtr(username.data, username.length); +} + +kj::ArrayPtr Url::getPassword() const { + ada_string password = ada_get_password(get(inner)); + return kj::ArrayPtr(password.data, password.length); +} + +kj::ArrayPtr Url::getPort() const { + ada_string port = ada_get_port(get(inner)); + return kj::ArrayPtr(port.data, port.length); +} + +kj::ArrayPtr Url::getHash() const { + ada_string hash = ada_get_hash(get(inner)); + return kj::ArrayPtr(hash.data, hash.length); +} + +kj::ArrayPtr Url::getHost() const { + ada_string host = ada_get_host(get(inner)); + return kj::ArrayPtr(host.data, host.length); +} + +kj::ArrayPtr Url::getHostname() const { + ada_string hostname = ada_get_hostname(get(inner)); + return kj::ArrayPtr(hostname.data, hostname.length); +} + +kj::ArrayPtr Url::getPathname() const { + ada_string path = ada_get_pathname(get(inner)); + return kj::ArrayPtr(path.data, path.length); +} + +kj::ArrayPtr Url::getSearch() const { + ada_string search = ada_get_search(get(inner)); + return kj::ArrayPtr(search.data, search.length); +} + +kj::ArrayPtr Url::getProtocol() const { + ada_string protocol = ada_get_protocol(get(inner)); + return kj::ArrayPtr(protocol.data, protocol.length); +} + +kj::Array Url::getOrigin() const { + ada_owned_string result = ada_get_origin(get(inner)); + return kj::Array( + const_cast(result.data), + result.length, + AdaOwnedStringDisposer::INSTANCE); +} + +bool Url::setHref(kj::ArrayPtr value) { + return ada_set_href(get(inner), value.begin(), value.size()); +} + +bool Url::setHost(kj::ArrayPtr value) { + return ada_set_host(get(inner), value.begin(), value.size()); +} + +bool Url::setHostname(kj::ArrayPtr value) { + return ada_set_hostname(get(inner), value.begin(), value.size()); +} + +bool Url::setProtocol(kj::ArrayPtr value) { + return ada_set_protocol(get(inner), value.begin(), value.size()); +} + +bool Url::setUsername(kj::ArrayPtr value) { + return ada_set_username(get(inner), value.begin(), value.size()); +} + +bool Url::setPassword(kj::ArrayPtr value) { + return ada_set_password(get(inner), value.begin(), value.size()); +} + +bool Url::setPort(kj::Maybe> value) { + KJ_IF_SOME(v, value) { + return ada_set_port(get(inner), v.begin(), v.size()); + } + ada_clear_port(get(inner)); + return true; +} + +bool Url::setPathname(kj::ArrayPtr value) { + return ada_set_pathname(get(inner), value.begin(), value.size()); +} + +void Url::setSearch(kj::Maybe> value) { + KJ_IF_SOME(v, value) { + return ada_set_search(get(inner), v.begin(), v.size()); + } + ada_clear_search(get(inner)); +} + +void Url::setHash(kj::Maybe> value) { + KJ_IF_SOME(v, value) { + return ada_set_hash(get(inner), v.begin(), v.size()); + } + ada_clear_hash(get(inner)); +} + +Url::SchemeType Url::getSchemeType() const { + uint8_t value = ada_get_scheme_type(const_cast(get(inner))); + KJ_REQUIRE(value <= static_cast(SchemeType::FILE)); + return static_cast(value); +} + +Url::HostType Url::getHostType() const { + uint8_t value = ada_get_host_type(const_cast(get(inner))); + KJ_REQUIRE(value <= static_cast(HostType::IPV6)); + return static_cast(value); +} + +Url Url::clone(EquivalenceOption option) { + ada_url copy = ada_copy(get(inner)); + if (option == EquivalenceOption::IGNORE_FRAGMENTS) { + ada_clear_hash(copy); + } + return Url(kj::Own(copy, AdaUrlDisposer::INSTANCE)); +} + +kj::Array Url::idnToUnicode(kj::ArrayPtr value) { + ada_owned_string result = ada_idna_to_unicode(value.begin(), value.size()); + return kj::Array( + result.data, + result.length, + AdaOwnedStringDisposer::INSTANCE); +} + +kj::Array Url::idnToAscii(kj::ArrayPtr value) { + ada_owned_string result = ada_idna_to_ascii(value.begin(), value.size()); + return kj::Array( + result.data, + result.length, + AdaOwnedStringDisposer::INSTANCE); +} + +kj::uint Url::hashCode() const { + return kj::hashCode(getHref()); +} + +} // namespace workerd::jsg diff --git a/src/workerd/jsg/url.h b/src/workerd/jsg/url.h new file mode 100644 index 00000000000..edf889eb2ca --- /dev/null +++ b/src/workerd/jsg/url.h @@ -0,0 +1,112 @@ +#pragma once +#include +#include + +using ada_url = void*; + +namespace workerd::jsg { + +// A WHATWG-compliant URL implementation provided by ada-url. +class Url final { +public: + // Keep in sync with ada::scheme:type + enum class SchemeType { + HTTP = 0, + NOT_SPECIAL = 1, + HTTPS = 2, + WS = 3, + FTP = 4, + WSS = 5, + FILE = 6 + }; + + // Keep in sync with ada::url_host_type + enum class HostType { + DEFAULT = 0, + IPV4 = 1, + IPV6 = 2, + }; + + Url(decltype(nullptr)) {} + Url(Url&& other); + KJ_DISALLOW_COPY(Url); + + Url& operator=(Url&& other); + bool operator==(const Url& other) const KJ_WARN_UNUSED_RESULT; + + enum class EquivalenceOption { + DEFAULT, + IGNORE_FRAGMENTS, + }; + + bool equal(const Url& other, EquivalenceOption option = EquivalenceOption::DEFAULT) const + KJ_WARN_UNUSED_RESULT; + + // Returns true if the given input can be successfully parsed as a URL. This is generally + // more performant than using tryParse and checking for a kj::none result if all you want + // to do is verify that the input is parseable. If you actually want to parse and use the + // result, use tryParse instead. + static bool canParse(kj::ArrayPtr input, + kj::Maybe> base = kj::none) + KJ_WARN_UNUSED_RESULT; + static bool canParse(kj::StringPtr input, + kj::Maybe base = kj::none) + KJ_WARN_UNUSED_RESULT; + + static kj::Maybe tryParse(kj::ArrayPtr input, + kj::Maybe> base = kj::none) + KJ_WARN_UNUSED_RESULT; + static kj::Maybe tryParse(kj::StringPtr input, + kj::Maybe base = kj::none) + KJ_WARN_UNUSED_RESULT; + + kj::Array getOrigin() const KJ_WARN_UNUSED_RESULT; + kj::ArrayPtr getProtocol() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getHref() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getPathname() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getUsername() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getPassword() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getPort() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getHash() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getHost() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getHostname() const KJ_LIFETIMEBOUND; + kj::ArrayPtr getSearch() const KJ_LIFETIMEBOUND; + + bool setHref(kj::ArrayPtr value); + bool setHost(kj::ArrayPtr value); + bool setHostname(kj::ArrayPtr value); + bool setProtocol(kj::ArrayPtr value); + bool setUsername(kj::ArrayPtr value); + bool setPassword(kj::ArrayPtr value); + bool setPort(kj::Maybe> value); + bool setPathname(kj::ArrayPtr value); + void setSearch(kj::Maybe> value); + void setHash(kj::Maybe> value); + + kj::uint hashCode() const; + + kj::Maybe resolve(kj::ArrayPtr input) KJ_WARN_UNUSED_RESULT; + + // Copies this Url. If the option is set of EquivalenceOption::IGNORE_FRAGMENTS, the + // copied Url will clear any fragment/hash that exists. + Url clone(EquivalenceOption option = EquivalenceOption::DEFAULT) KJ_WARN_UNUSED_RESULT; + + HostType getHostType() const; + SchemeType getSchemeType() const; + + // Convert an ASCII hostname to Unicode. + static kj::Array idnToUnicode(kj::ArrayPtr value) KJ_WARN_UNUSED_RESULT; + + // Convert a Unicode hostname to ASCII. + static kj::Array idnToAscii(kj::ArrayPtr value) KJ_WARN_UNUSED_RESULT; + +private: + Url(kj::Own inner); + kj::Own inner; +}; + +inline kj::String KJ_STRINGIFY(const Url& url) { + return kj::str(url.getHref()); +} + +} // namespace workerd::jsg