diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 6d966bb117d382..2e7801fa896ef7 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -435,8 +435,7 @@ bool InitializeICUDirectory(const std::string& path) { int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, - size_t length, - bool lenient) { + size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; @@ -461,14 +460,7 @@ int32_t ToUnicode(MaybeStackBuffer* buf, &status); } - // UTS #46's ToUnicode operation applies no validation of domain name length - // (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For - // that reason, unlike ToASCII below, ICU4C correctly accepts long domain - // names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS - // #46. Therefore, explicitly filters out that error here. - info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; - - if (U_FAILURE(status) || (!lenient && info.errors != 0)) { + if (U_FAILURE(status)) { len = -1; buf->SetLength(0); } else { @@ -517,6 +509,21 @@ int32_t ToASCII(MaybeStackBuffer* buf, info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + // These error conditions are mandated unconditionally by UTS #46 version + // 9.0.0 (rev. 17), but were found to be incompatible with many actual domain + // names in the wild. As such, in the current UTS #46 draft (rev. 18) these + // checks are made optional depending on the CheckHyphens flag, which will be + // disabled in WHATWG URL's "domain to ASCII" algorithm as soon as the UTS #46 + // draft becomes standard. + // Refs: + // - https://github.com/whatwg/url/issues/53 + // - http://www.unicode.org/review/pri317/ + // - http://www.unicode.org/reports/tr46/tr46-18.html + // - https://www.icann.org/news/announcement-2000-01-07-en + info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; + info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; + info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; + if (U_FAILURE(status) || (!lenient && info.errors != 0)) { len = -1; buf->SetLength(0); @@ -533,11 +540,9 @@ static void ToUnicode(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value val(env->isolate(), args[0]); - // optional arg - bool lenient = args[1]->BooleanValue(env->context()).FromJust(); MaybeStackBuffer buf; - int32_t len = ToUnicode(&buf, *val, val.length(), lenient); + int32_t len = ToUnicode(&buf, *val, val.length()); if (len < 0) { return env->ThrowError("Cannot convert name to Unicode"); diff --git a/src/node_i18n.h b/src/node_i18n.h index 78e18fdb370c94..cc1f3e6ea53569 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -43,8 +43,7 @@ int32_t ToASCII(MaybeStackBuffer* buf, bool lenient = false); int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, - size_t length, - bool lenient = false); + size_t length); } // namespace i18n } // namespace node diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js index f105adeed1243b..41f12edc6c2f7d 100644 --- a/test/fixtures/url-idna.js +++ b/test/fixtures/url-idna.js @@ -191,6 +191,27 @@ module.exports = { { ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`, unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com` + }, + // URLs with hyphen + { + ascii: 'r4---sn-a5mlrn7s.gevideo.com', + unicode: 'r4---sn-a5mlrn7s.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s.gevideo.com', + unicode: '-sn-a5mlrn7s.gevideo.com' + }, + { + ascii: 'sn-a5mlrn7s-.gevideo.com', + unicode: 'sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s-.gevideo.com', + unicode: '-sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn--a5mlrn7s-.gevideo.com', + unicode: '-sn--a5mlrn7s-.gevideo.com' } ], invalid: [ @@ -198,15 +219,6 @@ module.exports = { { url: '\ufffd.com', mode: 'ascii' - }, - { - url: '\ufffd.com', - mode: 'unicode' - }, - // invalid Punycode - { - url: 'xn---abc.com', - mode: 'unicode' } ] }