Skip to content

Commit

Permalink
src: WHATWG URL C++ parser cleanup
Browse files Browse the repository at this point in the history
- Clarify port state
- Remove scheme flag
- Clarify URL_FLAG_TERMINATED

PR-URL: #11917
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
  • Loading branch information
TimothyGu committed Mar 30, 2017
1 parent 7139b93 commit 4ddd23f
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 30 deletions.
54 changes: 31 additions & 23 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,9 @@ namespace url {
if (flags->IsInt32())
base->flags = flags->Int32Value(context).FromJust();

GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME);
Local<Value> scheme = GET(env, base_obj, "scheme");
base->scheme = Utf8Value(env->isolate(), scheme).out();

GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
Expand Down Expand Up @@ -644,7 +646,7 @@ namespace url {
state = kNoScheme;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
Expand All @@ -654,10 +656,12 @@ namespace url {
p++;
continue;
} else if (ch == ':' || (has_state_override && ch == kEOL)) {
buffer += ':';
if (buffer.size() > 0) {
url->flags |= URL_FLAGS_HAS_SCHEME;
buffer += ':';
url->scheme = buffer;
} else if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand All @@ -672,7 +676,6 @@ namespace url {
state = kFile;
} else if (special &&
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == base->scheme) {
state = kSpecialRelativeOrAuthority;
} else if (special) {
Expand All @@ -692,7 +695,7 @@ namespace url {
p = input;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
Expand All @@ -702,7 +705,6 @@ namespace url {
url->flags |= URL_FLAGS_FAILED;
return;
} else if (cannot_be_base && ch == '#') {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand All @@ -725,12 +727,10 @@ namespace url {
url->flags |= URL_FLAGS_CANNOT_BE_BASE;
state = kFragment;
} else if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme != "file:") {
state = kRelative;
continue;
} else {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = "file:";
url->flags |= URL_FLAGS_SPECIAL;
special = true;
Expand All @@ -756,7 +756,6 @@ namespace url {
}
break;
case kRelative:
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand Down Expand Up @@ -951,7 +950,6 @@ namespace url {
buffer.clear();
state = kPort;
if (state_override == kHostname) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else if (ch == kEOL ||
Expand All @@ -972,7 +970,6 @@ namespace url {
buffer.clear();
state = kPathStart;
if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else {
Expand All @@ -996,13 +993,26 @@ namespace url {
int port = 0;
for (size_t i = 0; i < buffer.size(); i++)
port = port * 10 + buffer[i] - '0';
if (port >= 0 && port <= 0xffff) {
url->port = NormalizePort(url->scheme, port);
} else if (!has_state_override) {
url->flags |= URL_FLAGS_FAILED;
if (port < 0 || port > 0xffff) {
// TODO(TimothyGu): This hack is currently needed for the host
// setter since it needs access to hostname if it is valid, and
// if the FAILED flag is set the entire response to JS layer
// will be empty.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_FAILED;
return;
}
url->port = NormalizePort(url->scheme, port);
buffer.clear();
} else if (has_state_override) {
// TODO(TimothyGu): Similar case as above.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_TERMINATED;
return;
}
state = kPathStart;
continue;
Expand All @@ -1014,7 +1024,6 @@ namespace url {
case kFile:
base_is_file = (
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:");
switch (ch) {
case kEOL:
Expand Down Expand Up @@ -1097,7 +1106,6 @@ namespace url {
state = kFileHost;
} else {
if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:" &&
base->flags & URL_FLAGS_HAS_PATH &&
base->path.size() > 0 &&
Expand Down Expand Up @@ -1158,8 +1166,7 @@ namespace url {
url->path.push_back("");
}
} else {
if (url->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == "file:" &&
if (url->scheme == "file:" &&
url->path.empty() &&
buffer.size() == 2 &&
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
Expand Down Expand Up @@ -1233,8 +1240,7 @@ namespace url {
const struct url_data* url) {
Isolate* isolate = env->isolate();
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
if (url->flags & URL_FLAGS_HAS_SCHEME)
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
if (url->flags & URL_FLAGS_HAS_USERNAME)
argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
if (url->flags & URL_FLAGS_HAS_PASSWORD)
Expand Down Expand Up @@ -1275,7 +1281,9 @@ namespace url {
HarvestBase(env, &base, base_obj.As<Object>());

URL::Parse(input, len, state_override, &url, &base, has_base);
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE)
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
return;

// Define the return value placeholders
Expand Down
13 changes: 6 additions & 7 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input,
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
XX(URL_FLAGS_TERMINATED, 0x08) \
XX(URL_FLAGS_SPECIAL, 0x10) \
XX(URL_FLAGS_HAS_SCHEME, 0x20) \
XX(URL_FLAGS_HAS_USERNAME, 0x40) \
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
XX(URL_FLAGS_HAS_HOST, 0x100) \
XX(URL_FLAGS_HAS_PATH, 0x200) \
XX(URL_FLAGS_HAS_QUERY, 0x400) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
XX(URL_FLAGS_HAS_USERNAME, 0x20) \
XX(URL_FLAGS_HAS_PASSWORD, 0x40) \
XX(URL_FLAGS_HAS_HOST, 0x80) \
XX(URL_FLAGS_HAS_PATH, 0x100) \
XX(URL_FLAGS_HAS_QUERY, 0x200) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x400)

#define ARGS(XX) \
XX(ARG_FLAGS) \
Expand Down

0 comments on commit 4ddd23f

Please sign in to comment.