From 4ddd23f0ec5202b131b7070d543b20cd031f052a Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 15 Mar 2017 19:33:08 -0700 Subject: [PATCH] src: WHATWG URL C++ parser cleanup - Clarify port state - Remove scheme flag - Clarify URL_FLAG_TERMINATED PR-URL: https://github.com/nodejs/node/pull/11917 Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- src/node_url.cc | 54 ++++++++++++++++++++++++++++--------------------- src/node_url.h | 13 ++++++------ 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 4f3525332ebd94..54a2944588071c 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -494,7 +494,9 @@ namespace url { if (flags->IsInt32()) base->flags = flags->Int32Value(context).FromJust(); - GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); + Local scheme = GET(env, base_obj, "scheme"); + base->scheme = Utf8Value(env->isolate(), scheme).out(); + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); @@ -644,7 +646,7 @@ namespace url { state = kNoScheme; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -654,10 +656,12 @@ namespace url { p++; continue; } else if (ch == ':' || (has_state_override && ch == kEOL)) { - buffer += ':'; if (buffer.size() > 0) { - url->flags |= URL_FLAGS_HAS_SCHEME; + buffer += ':'; url->scheme = buffer; + } else if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; + return; } if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -672,7 +676,6 @@ namespace url { state = kFile; } else if (special && has_base && - base->flags & URL_FLAGS_HAS_SCHEME && url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { @@ -692,7 +695,7 @@ namespace url { p = input; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -702,7 +705,6 @@ namespace url { url->flags |= URL_FLAGS_FAILED; return; } else if (cannot_be_base && ch == '#') { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -725,12 +727,10 @@ namespace url { url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; } else if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme != "file:") { state = kRelative; continue; } else { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = "file:"; url->flags |= URL_FLAGS_SPECIAL; special = true; @@ -756,7 +756,6 @@ namespace url { } break; case kRelative: - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -951,7 +950,6 @@ namespace url { buffer.clear(); state = kPort; if (state_override == kHostname) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else if (ch == kEOL || @@ -972,7 +970,6 @@ namespace url { buffer.clear(); state = kPathStart; if (has_state_override) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else { @@ -996,13 +993,26 @@ namespace url { int port = 0; for (size_t i = 0; i < buffer.size(); i++) port = port * 10 + buffer[i] - '0'; - if (port >= 0 && port <= 0xffff) { - url->port = NormalizePort(url->scheme, port); - } else if (!has_state_override) { - url->flags |= URL_FLAGS_FAILED; + if (port < 0 || port > 0xffff) { + // TODO(TimothyGu): This hack is currently needed for the host + // setter since it needs access to hostname if it is valid, and + // if the FAILED flag is set the entire response to JS layer + // will be empty. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_FAILED; return; } + url->port = NormalizePort(url->scheme, port); buffer.clear(); + } else if (has_state_override) { + // TODO(TimothyGu): Similar case as above. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_TERMINATED; + return; } state = kPathStart; continue; @@ -1014,7 +1024,6 @@ namespace url { case kFile: base_is_file = ( has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:"); switch (ch) { case kEOL: @@ -1097,7 +1106,6 @@ namespace url { state = kFileHost; } else { if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:" && base->flags & URL_FLAGS_HAS_PATH && base->path.size() > 0 && @@ -1158,8 +1166,7 @@ namespace url { url->path.push_back(""); } } else { - if (url->flags & URL_FLAGS_HAS_SCHEME && - url->scheme == "file:" && + if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { @@ -1233,8 +1240,7 @@ namespace url { const struct url_data* url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - if (url->flags & URL_FLAGS_HAS_SCHEME) - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); if (url->flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); if (url->flags & URL_FLAGS_HAS_PASSWORD) @@ -1275,7 +1281,9 @@ namespace url { HarvestBase(env, &base, base_obj.As()); URL::Parse(input, len, state_override, &url, &base, has_base); - if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) + if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) return; // Define the return value placeholders diff --git a/src/node_url.h b/src/node_url.h index 809bf557be1e00..b17d81778fa0f0 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input, XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ XX(URL_FLAGS_TERMINATED, 0x08) \ XX(URL_FLAGS_SPECIAL, 0x10) \ - XX(URL_FLAGS_HAS_SCHEME, 0x20) \ - XX(URL_FLAGS_HAS_USERNAME, 0x40) \ - XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ - XX(URL_FLAGS_HAS_HOST, 0x100) \ - XX(URL_FLAGS_HAS_PATH, 0x200) \ - XX(URL_FLAGS_HAS_QUERY, 0x400) \ - XX(URL_FLAGS_HAS_FRAGMENT, 0x800) + XX(URL_FLAGS_HAS_USERNAME, 0x20) \ + XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ + XX(URL_FLAGS_HAS_HOST, 0x80) \ + XX(URL_FLAGS_HAS_PATH, 0x100) \ + XX(URL_FLAGS_HAS_QUERY, 0x200) \ + XX(URL_FLAGS_HAS_FRAGMENT, 0x400) #define ARGS(XX) \ XX(ARG_FLAGS) \