Skip to content

Commit

Permalink
Add ada-url dependency, initial impl of jsg::Url
Browse files Browse the repository at this point in the history
This will serve as the new underlying implementation of the
url-standard.{h|c++} class while also supporting handling of
module import specifiers as URLs.
  • Loading branch information
jasnell committed Oct 5, 2023
1 parent e51f3be commit 5389d0f
Show file tree
Hide file tree
Showing 8 changed files with 397 additions and 0 deletions.
10 changes: 10 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ http_archive(
urls = ["https://github.com/google/brotli/tarball/ec107cf015139c791f79afac0f96c3a2c45e157f"],
)

http_archive(
name = "ada-url",
build_file = "//:build/BUILD.ada-url",
sha256 = "850f5dbe0aa606a1c2f0aaa7feec3c5da6b1e09fb5e5dab9b5554469c7795ef4",
type = "zip",
url = "https://github.com/ada-url/ada/releases/download/v2.6.10/singleheader.zip",
patches = [],
patch_args = ["-p1"],
)

# ========================================================================================
# Dawn
#
Expand Down
13 changes: 13 additions & 0 deletions build/BUILD.ada-url
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

cc_library(
name = "ada-url",
hdrs = ["ada.h", "ada_c.h"],
srcs = ["ada.cpp"],
visibility = ["//visibility:public"],
include_prefix = ".",
copts = ["-w"],
defines = [
"ADA_SSE2=1"
],
alwayslink = 1,
)
2 changes: 2 additions & 0 deletions compile_flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
-nostdinc
-Ibazel-bin/external/com_googlesource_chromium_base_trace_event_common/_virtual_includes/trace_event_common
-Ibazel-bin/external/dawn/include
-Ibazel-bin/external/ada-url/_virtual_includes/ada-url/
-Ibazel-bin/external/com_cloudflare_lol_html/_virtual_includes/lolhtml
-Iexternal/com_google_benchmark/include/
-Iexternal/dawn/include
-Iexternal/ada-url/
-Isrc
-isystem/usr/include
-isystem/usr/include/x86_64-linux-gnu
Expand Down
12 changes: 12 additions & 0 deletions src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ wd_cc_library(
":exception",
":modules_capnp",
":observer",
":url",
"//src/workerd/util",
"//src/workerd/util:sentry",
"//src/workerd/util:thread-scopes",
Expand All @@ -34,6 +35,17 @@ wd_cc_library(
],
)

wd_cc_library(
name = "url",
srcs = ["url.c++"],
hdrs = ["url.h"],
visibility = ["//visibility:public"],
deps = [
"@capnp-cpp//src/kj",
"@ada-url",
],
)

wd_cc_library(
name = "exception",
srcs = ["exception.c++"],
Expand Down
1 change: 1 addition & 0 deletions src/workerd/jsg/jsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -2333,3 +2333,4 @@ inline v8::Local<v8::Context> JsContext<T>::getHandle(Lock& js) {
#include "function.h"
#include "iterator.h"
#include "jsvalue.h"
#include "url.h"
49 changes: 49 additions & 0 deletions src/workerd/jsg/url-test.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#include "jsg-test.h"
#include "url.h"

namespace workerd::jsg::test {
namespace {

KJ_TEST("Basics") {
Url theUrl = nullptr;
KJ_IF_SOME(url, Url::tryParse("http://example.org:81"_kj)) {
KJ_ASSERT(url.getOrigin() == "http://example.org:81"_kj);
KJ_ASSERT(url.getHref() == "http://example.org:81/"_kj);
KJ_ASSERT(url.getProtocol() == "http:"_kj);
KJ_ASSERT(url.getHostname() == "example.org"_kj);
KJ_ASSERT(url.getHost() == "example.org:81"_kj);
KJ_ASSERT(url.getPort() == "81"_kj);
KJ_ASSERT(url.getPathname() == "/"_kj);
KJ_ASSERT(url.getSchemeType() == Url::SchemeType::HTTP);
KJ_ASSERT(url.getHostType() == Url::HostType::DEFAULT);
KJ_ASSERT(url.getUsername() == ""_kj);
KJ_ASSERT(url.getPassword() == ""_kj);
KJ_ASSERT(url.getHash() == ""_kj);
KJ_ASSERT(url.getSearch() == ""_kj);

theUrl = url.clone();
KJ_ASSERT(theUrl == url);
theUrl = kj::mv(url);

auto res = KJ_ASSERT_NONNULL(theUrl.resolve("abc"_kj));
KJ_ASSERT(res.getHref() == "http://example.org:81/abc"_kj);
} else {
KJ_FAIL_ASSERT("url could not be parsed");
}

KJ_ASSERT(Url::toAscii("täst.de"_kj) == "xn--tst-qla.de"_kj);
KJ_ASSERT(Url::toUnicode("xn--tst-qla.de"_kj) == "täst.de"_kj);
}

KJ_TEST("Non-special URL") {
auto url = KJ_ASSERT_NONNULL(Url::tryParse("abc://123"_kj));
KJ_ASSERT(url.getOrigin() == "null"_kj);
KJ_ASSERT(url.getProtocol() == "abc:"_kj);
}

} // namespace
} // namespace workerd::jsg::test
223 changes: 223 additions & 0 deletions src/workerd/jsg/url.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
#include "url.h"

extern "C" {
#include <ada_c.h>
}

#include <kj/debug.h>

namespace workerd::jsg {

namespace {
class AdaUrlDisposer : public kj::Disposer {
public:
static const AdaUrlDisposer INSTANCE;

protected:
void disposeImpl(void* pointer) const override {
ada_free(pointer);
}
};
const AdaUrlDisposer AdaUrlDisposer::INSTANCE;

class AdaOwnedStringDisposer : public kj::ArrayDisposer {
public:
static const AdaOwnedStringDisposer INSTANCE;

protected:
void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount,
size_t capacity, void (*destroyElement)(void*)) const {
ada_owned_string data = {
static_cast<const char*>(firstElement),
elementCount };
ada_free_owned_string(data);
}
};
const AdaOwnedStringDisposer AdaOwnedStringDisposer::INSTANCE;

kj::Own<void> wrap(ada_url url) {
return kj::Own<void>(url, AdaUrlDisposer::INSTANCE);
}

} // namespace

Url::Url(kj::Own<void> inner) : inner(kj::mv(inner)) {}

Url::Url(Url&& other) : inner(kj::mv(other.inner)) {}

Url& Url::operator=(Url&& other) {
inner = kj::mv(other.inner);
return *this;
}

bool Url::operator==(const Url& other) const {
return getHref() == other.getHref();
}

bool Url::canParse(kj::StringPtr input, kj::Maybe<kj::StringPtr> base) {
KJ_IF_SOME(b, base) {
return ada_can_parse_with_base(input.begin(), input.size(),
b.begin(), b.size());
}
return ada_can_parse(input.begin(), input.size());
}

kj::Maybe<Url> Url::tryParse(kj::ArrayPtr<const char> input,
kj::Maybe<kj::ArrayPtr<const char>> base) {
ada_url result = nullptr;
KJ_IF_SOME(b, base) {
result = ada_parse_with_base(input.begin(), input.size(),
b.begin(), b.size());
} else {
result = ada_parse(input.begin(), input.size());
}
if (!ada_is_valid(result)) return kj::none;
return Url(wrap(result));
}

kj::Maybe<Url> Url::resolve(kj::ArrayPtr<const char> input) {
return tryParse(input, getHref());
}

kj::ArrayPtr<const char> Url::getHref() const {
ada_string href = ada_get_href(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(href.data, href.length);
}

kj::ArrayPtr<const char> Url::getUsername() const {
ada_string username = ada_get_username(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(username.data, username.length);
}

kj::ArrayPtr<const char> Url::getPassword() const {
ada_string password = ada_get_password(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(password.data, password.length);
}

kj::ArrayPtr<const char> Url::getPort() const {
ada_string port = ada_get_port(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(port.data, port.length);
}

kj::ArrayPtr<const char> Url::getHash() const {
ada_string hash = ada_get_hash(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(hash.data, hash.length);
}

kj::ArrayPtr<const char> Url::getHost() const {
ada_string host = ada_get_host(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(host.data, host.length);
}

kj::ArrayPtr<const char> Url::getHostname() const {
ada_string host = ada_get_hostname(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(host.data, host.length);
}

kj::ArrayPtr<const char> Url::getPathname() const {
ada_string path = ada_get_pathname(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(path.data, path.length);
}

kj::ArrayPtr<const char> Url::getSearch() const {
ada_string search = ada_get_search(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(search.data, search.length);
}

kj::ArrayPtr<const char> Url::getProtocol() const {
ada_string protocol = ada_get_protocol(const_cast<void*>(inner.get()));
return kj::ArrayPtr<const char>(protocol.data, protocol.length);
}

kj::Array<const char> Url::getOrigin() const {
ada_owned_string result = ada_get_origin(const_cast<void*>(inner.get()));
return kj::Array<const char>(
const_cast<char*>(result.data),
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

bool Url::setHref(kj::ArrayPtr<const char> value) {
return ada_set_href(inner.get(), value.begin(), value.size());
}

bool Url::setHost(kj::ArrayPtr<const char> value) {
return ada_set_host(inner.get(), value.begin(), value.size());
}

bool Url::setHostname(kj::ArrayPtr<const char> value) {
return ada_set_hostname(inner.get(), value.begin(), value.size());
}

bool Url::setProtocol(kj::ArrayPtr<const char> value) {
return ada_set_protocol(inner.get(), value.begin(), value.size());
}

bool Url::setUsername(kj::ArrayPtr<const char> value) {
return ada_set_username(inner.get(), value.begin(), value.size());
}

bool Url::setPassword(kj::ArrayPtr<const char> value) {
return ada_set_password(inner.get(), value.begin(), value.size());
}

bool Url::setPort(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_port(inner.get(), v.begin(), v.size());
}
ada_clear_port(inner.get());
return true;
}

bool Url::setPathname(kj::ArrayPtr<const char> value) {
return ada_set_pathname(inner.get(), value.begin(), value.size());
}

void Url::setSearch(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_search(inner.get(), v.begin(), v.size());
}
ada_clear_search(inner.get());
}

void Url::setHash(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_hash(inner.get(), v.begin(), v.size());
}
ada_clear_hash(inner.get());
}

Url::SchemeType Url::getSchemeType() const {
uint8_t value = ada_get_scheme_type(const_cast<void*>(inner.get()));
KJ_REQUIRE(value <= static_cast<uint8_t>(SchemeType::FILE));
return static_cast<SchemeType>(value);
}

Url::HostType Url::getHostType() const {
uint8_t value = ada_get_host_type(const_cast<void*>(inner.get()));
KJ_REQUIRE(value <= static_cast<uint8_t>(HostType::IPV6));
return static_cast<HostType>(value);
}

Url Url::clone() {
ada_url copy = ada_copy(inner.get());
return Url(kj::Own<void>(copy, AdaUrlDisposer::INSTANCE));
}

kj::Array<const char> Url::toUnicode(kj::ArrayPtr<const char> value) {
ada_owned_string result = ada_idna_to_unicode(value.begin(), value.size());
return kj::Array<const char>(
result.data,
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

kj::Array<const char> Url::toAscii(kj::ArrayPtr<const char> value) {
ada_owned_string result = ada_idna_to_ascii(value.begin(), value.size());
return kj::Array<const char>(
result.data,
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

} // namespace workerd::jsg
Loading

0 comments on commit 5389d0f

Please sign in to comment.