forked from libuv/libuv
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
unix,win: support IDNA 2008 in uv_getaddrinfo()
Encode domain names before passing them on to the libc resolver. Some getaddrinfo() implementations support IDNA 2008, some only IDNA 2003 and some don't support i18n domain names at all. This is a potential security issue because it means a domain name might resolve differently depending on the system that libuv is running on. Fixes: libuv#2028 PR-URL: libuv#2046 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
- Loading branch information
1 parent
143da93
commit 6dd44ca
Showing
10 changed files
with
566 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl> | ||
* | ||
* Permission to use, copy, modify, and/or distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
/* Derived from https://github.com/bnoordhuis/punycode | ||
* but updated to support IDNA 2008. | ||
*/ | ||
|
||
#include "uv.h" | ||
#include "idna.h" | ||
#include <string.h> | ||
|
||
static unsigned uv__utf8_decode1_slow(const char** p, | ||
const char* pe, | ||
unsigned a) { | ||
unsigned b; | ||
unsigned c; | ||
unsigned d; | ||
unsigned min; | ||
|
||
if (a > 0xF7) | ||
return -1; | ||
|
||
switch (*p - pe) { | ||
default: | ||
if (a > 0xEF) { | ||
min = 0x10000; | ||
a = a & 7; | ||
b = (unsigned char) *(*p)++; | ||
c = (unsigned char) *(*p)++; | ||
d = (unsigned char) *(*p)++; | ||
break; | ||
} | ||
/* Fall through. */ | ||
case 2: | ||
if (a > 0xDF) { | ||
min = 0x800; | ||
b = 0x80 | (a & 15); | ||
c = (unsigned char) *(*p)++; | ||
d = (unsigned char) *(*p)++; | ||
a = 0; | ||
break; | ||
} | ||
/* Fall through. */ | ||
case 1: | ||
if (a > 0xBF) { | ||
min = 0x80; | ||
b = 0x80; | ||
c = 0x80 | (a & 31); | ||
d = (unsigned char) *(*p)++; | ||
a = 0; | ||
break; | ||
} | ||
return -1; /* Invalid continuation byte. */ | ||
} | ||
|
||
if (0x80 != (0xC0 & (b ^ c ^ d))) | ||
return -1; /* Invalid sequence. */ | ||
|
||
b &= 63; | ||
c &= 63; | ||
d &= 63; | ||
a = (a << 18) | (b << 12) | (c << 6) | d; | ||
|
||
if (a < min) | ||
return -1; /* Overlong sequence. */ | ||
|
||
if (a > 0x10FFFF) | ||
return -1; /* Four-byte sequence > U+10FFFF. */ | ||
|
||
if (a >= 0xD800 && a <= 0xDFFF) | ||
return -1; /* Surrogate pair. */ | ||
|
||
return a; | ||
} | ||
|
||
unsigned uv__utf8_decode1(const char** p, const char* pe) { | ||
unsigned a; | ||
|
||
a = (unsigned char) *(*p)++; | ||
|
||
if (a < 128) | ||
return a; /* ASCII, common case. */ | ||
|
||
return uv__utf8_decode1_slow(p, pe, a); | ||
} | ||
|
||
#define foreach_codepoint(c, p, pe) \ | ||
for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;) | ||
|
||
static int uv__idna_toascii_label(const char* s, const char* se, | ||
char** d, char* de) { | ||
static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789"; | ||
const char* ss; | ||
unsigned c; | ||
unsigned h; | ||
unsigned k; | ||
unsigned n; | ||
unsigned m; | ||
unsigned q; | ||
unsigned t; | ||
unsigned x; | ||
unsigned y; | ||
unsigned bias; | ||
unsigned delta; | ||
unsigned todo; | ||
int first; | ||
|
||
h = 0; | ||
ss = s; | ||
todo = 0; | ||
|
||
foreach_codepoint(c, &s, se) { | ||
if (c < 128) | ||
h++; | ||
else if (c == (unsigned) -1) | ||
return UV_EINVAL; | ||
else | ||
todo++; | ||
} | ||
|
||
if (todo > 0) { | ||
if (*d < de) *(*d)++ = 'x'; | ||
if (*d < de) *(*d)++ = 'n'; | ||
if (*d < de) *(*d)++ = '-'; | ||
if (*d < de) *(*d)++ = '-'; | ||
} | ||
|
||
x = 0; | ||
s = ss; | ||
foreach_codepoint(c, &s, se) { | ||
if (c > 127) | ||
continue; | ||
|
||
if (*d < de) | ||
*(*d)++ = c; | ||
|
||
if (++x == h) | ||
break; /* Visited all ASCII characters. */ | ||
} | ||
|
||
if (todo == 0) | ||
return h; | ||
|
||
/* Only write separator when we've written ASCII characters first. */ | ||
if (h > 0) | ||
if (*d < de) | ||
*(*d)++ = '-'; | ||
|
||
n = 128; | ||
bias = 72; | ||
delta = 0; | ||
first = 1; | ||
|
||
while (todo > 0) { | ||
m = -1; | ||
s = ss; | ||
foreach_codepoint(c, &s, se) | ||
if (c >= n) | ||
if (c < m) | ||
m = c; | ||
|
||
x = m - n; | ||
y = h + 1; | ||
|
||
if (x > ~delta / y) | ||
return UV_E2BIG; /* Overflow. */ | ||
|
||
delta += x * y; | ||
n = m; | ||
|
||
s = ss; | ||
foreach_codepoint(c, &s, se) { | ||
if (c < n) | ||
if (++delta == 0) | ||
return UV_E2BIG; /* Overflow. */ | ||
|
||
if (c != n) | ||
continue; | ||
|
||
for (k = 36, q = delta; /* empty */; k += 36) { | ||
t = 1; | ||
|
||
if (k > bias) | ||
t = k - bias; | ||
|
||
if (t > 26) | ||
t = 26; | ||
|
||
if (q < t) | ||
break; | ||
|
||
/* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore | ||
* 10 <= y <= 35, we can optimize the long division | ||
* into a table-based reciprocal multiplication. | ||
*/ | ||
x = q - t; | ||
y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */ | ||
q = x / y; | ||
t = t + x % y; /* 1 <= t <= 35 because of y. */ | ||
|
||
if (*d < de) | ||
*(*d)++ = alphabet[t]; | ||
} | ||
|
||
if (*d < de) | ||
*(*d)++ = alphabet[q]; | ||
|
||
delta /= 2; | ||
|
||
if (first) { | ||
delta /= 350; | ||
first = 0; | ||
} | ||
|
||
/* No overflow check is needed because |delta| was just | ||
* divided by 2 and |delta+delta >= delta + delta/h|. | ||
*/ | ||
h++; | ||
delta += delta / h; | ||
|
||
for (bias = 0; delta > 35 * 26 / 2; bias += 36) | ||
delta /= 35; | ||
|
||
bias += 36 * delta / (delta + 38); | ||
delta = 0; | ||
todo--; | ||
} | ||
|
||
delta++; | ||
n++; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
#undef foreach_codepoint | ||
|
||
long uv__idna_toascii(const char* s, const char* se, char* d, char* de) { | ||
const char* si; | ||
const char* st; | ||
unsigned c; | ||
char* ds; | ||
int rc; | ||
|
||
ds = d; | ||
|
||
for (si = s; si < se; /* empty */) { | ||
st = si; | ||
c = uv__utf8_decode1(&si, se); | ||
|
||
if (c != '.') | ||
if (c != 0x3002) /* 。 */ | ||
if (c != 0xFF0E) /* . */ | ||
if (c != 0xFF61) /* 。 */ | ||
continue; | ||
|
||
rc = uv__idna_toascii_label(s, st, &d, de); | ||
|
||
if (rc < 0) | ||
return rc; | ||
|
||
if (d < de) | ||
*d++ = '.'; | ||
|
||
s = si; | ||
} | ||
|
||
if (s < se) { | ||
rc = uv__idna_toascii_label(s, se, &d, de); | ||
|
||
if (rc < 0) | ||
return rc; | ||
} | ||
|
||
if (d < de) | ||
*d++ = '\0'; | ||
|
||
return d - ds; /* Number of bytes written. */ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl> | ||
* | ||
* Permission to use, copy, modify, and/or distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
#ifndef UV_SRC_IDNA_H_ | ||
#define UV_SRC_IDNA_H_ | ||
|
||
/* Decode a single codepoint. Returns the codepoint or UINT32_MAX on error. | ||
* |p| is updated on success _and_ error, i.e., bad multi-byte sequences are | ||
* skipped in their entirety, not just the first bad byte. | ||
*/ | ||
unsigned uv__utf8_decode1(const char** p, const char* pe); | ||
|
||
/* Convert a UTF-8 domain name to IDNA 2008 / Punycode. A return value >= 0 | ||
* is the number of bytes written to |d|, including the trailing nul byte. | ||
* A return value < 0 is a libuv error code. |s| and |d| can not overlap. | ||
*/ | ||
long uv__idna_toascii(const char* s, const char* se, char* d, char* de); | ||
|
||
#endif /* UV_SRC_IDNA_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.