Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deduplicate multiple isHexDigit impls and optimize #2876

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/workerd/api/util.c++
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "simdutf.h"

#include <workerd/util/mimetype.h>
#include <workerd/util/strings.h>

#include <kj/encoding.h>

Expand Down Expand Up @@ -216,7 +217,6 @@ kj::String redactUrl(kj::StringPtr url) {
bool isUpper = ('A' <= c && c <= 'Z');
bool isLower = ('a' <= c && c <= 'z');
bool isDigit = ('0' <= c && c <= '9');
bool isHexDigit = (isDigit || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'));
bool isSep = (c == '+' || c == '-' || c == '_');
// These extra characters are used in the regular and url-safe versions of
// base64, but might also be used for GUID-style separators in hex ids.
Expand All @@ -225,10 +225,10 @@ kj::String redactUrl(kj::StringPtr url) {
// character.

if (isUpper || isLower || isDigit || isSep) {
if (isHexDigit) {
if (isHexDigit(c)) {
hexDigitCount++;
}
if (!isHexDigit && !isSep) {
if (!isHexDigit(c) && !isSep) {
jasnell marked this conversation as resolved.
Show resolved Hide resolved
sawNonHexChar = true;
}
if (isUpper) {
Expand Down
5 changes: 0 additions & 5 deletions src/workerd/api/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,6 @@ kj::String toLower(kj::String&& str);
// Mutate `str` with all alphabetic ASCII characters uppercased. Returns `str`.
kj::String toUpper(kj::String&& str);

inline bool isHexDigit(uint32_t c) {
// Check if `c` is the ASCII code of a hexadecimal digit.
return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}

// Parse `rawText` as application/x-www-form-urlencoded name/value pairs and store in `query`. If
// `skipLeadingQuestionMark` is true, any initial '?' will be ignored. Otherwise, it will be
// interpreted as part of the first URL-encoded field.
Expand Down
1 change: 1 addition & 0 deletions src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ wd_cc_library(
visibility = ["//visibility:public"],
deps = [
":memory-tracker",
"//src/workerd/util:strings",
"@capnp-cpp//src/kj",
],
)
Expand Down
7 changes: 2 additions & 5 deletions src/workerd/jsg/url.c++
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "url.h"

#include <workerd/util/strings.h>

#include <kj/hash.h>

extern "C" {
Expand Down Expand Up @@ -488,11 +490,6 @@ inline bool isAsciiDigit(char c) {
return c >= '0' && c <= '9';
};

inline bool isHexDigit(char c) {
// Check if `c` is the ASCII code of a hexadecimal digit.
return isAsciiDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}

inline bool isAscii(char codepoint) {
return codepoint >= 0x00 && codepoint <= 0x7f;
};
Expand Down
17 changes: 17 additions & 0 deletions src/workerd/util/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,21 @@ inline kj::String toLowerCopy(kj::ArrayPtr<const char> ptr) {
return kj::mv(str);
}

constexpr kj::FixedArray<uint8_t, 256> kHexDigitTable = []() consteval {
jasnell marked this conversation as resolved.
Show resolved Hide resolved
kj::FixedArray<uint8_t, 256> result{};
for (uint8_t c: {'1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}) {
result[c] = true;
}
for (uint8_t c: {'A', 'B', 'C', 'D', 'E', 'F'}) {
result[c] = true; // Uppercase
result[c + 0x20] = true; // Lowercase
}
return result;
}();

// Check if `c` is the ASCII code of a hexadecimal digit.
constexpr bool isHexDigit(char c) {
jasnell marked this conversation as resolved.
Show resolved Hide resolved
return kHexDigitTable[static_cast<int>(c)] == 1;
jasnell marked this conversation as resolved.
Show resolved Hide resolved
}

} // namespace workerd