Skip to content

Commit

Permalink
unix,win: support IDNA 2008 in uv_getaddrinfo()
Browse files Browse the repository at this point in the history
Encode domain names before passing them on to the libc resolver.
Some getaddrinfo() implementations support IDNA 2008, some only
IDNA 2003 and some don't support i18n domain names at all.

This is a potential security issue because it means a domain name
might resolve differently depending on the system that libuv is
running on.

Fixes: libuv#2028
PR-URL: libuv#2046
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
  • Loading branch information
bnoordhuis committed Oct 30, 2018
1 parent 143da93 commit 6dd44ca
Show file tree
Hide file tree
Showing 10 changed files with 566 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ endif()

set(uv_sources
src/fs-poll.c
src/idna.c
src/inet.c
src/threadpool.c
src/timer.c
Expand Down Expand Up @@ -64,6 +65,7 @@ set(uv_test_sources
test/test-homedir.c
test/test-hrtime.c
test/test-idle.c
test/test-idna.c
test/test-ip4-addr.c
test/test-ip6-addr.c
test/test-ip6-addr.c
Expand Down
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ libuv_la_CFLAGS = @CFLAGS@
libuv_la_LDFLAGS = -no-undefined -version-info 1:0:0
libuv_la_SOURCES = src/fs-poll.c \
src/heap-inl.h \
src/idna.c \
src/inet.c \
src/queue.h \
src/threadpool.c \
Expand Down Expand Up @@ -189,6 +190,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \
test/test-homedir.c \
test/test-hrtime.c \
test/test-idle.c \
test/test-idna.c \
test/test-ip4-addr.c \
test/test-ip6-addr.c \
test/test-ipc-heavy-traffic-deadlock-bug.c \
Expand Down
291 changes: 291 additions & 0 deletions src/idna.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/

/* Derived from https://github.com/bnoordhuis/punycode
* but updated to support IDNA 2008.
*/

#include "uv.h"
#include "idna.h"
#include <string.h>

static unsigned uv__utf8_decode1_slow(const char** p,
const char* pe,
unsigned a) {
unsigned b;
unsigned c;
unsigned d;
unsigned min;

if (a > 0xF7)
return -1;

switch (*p - pe) {
default:
if (a > 0xEF) {
min = 0x10000;
a = a & 7;
b = (unsigned char) *(*p)++;
c = (unsigned char) *(*p)++;
d = (unsigned char) *(*p)++;
break;
}
/* Fall through. */
case 2:
if (a > 0xDF) {
min = 0x800;
b = 0x80 | (a & 15);
c = (unsigned char) *(*p)++;
d = (unsigned char) *(*p)++;
a = 0;
break;
}
/* Fall through. */
case 1:
if (a > 0xBF) {
min = 0x80;
b = 0x80;
c = 0x80 | (a & 31);
d = (unsigned char) *(*p)++;
a = 0;
break;
}
return -1; /* Invalid continuation byte. */
}

if (0x80 != (0xC0 & (b ^ c ^ d)))
return -1; /* Invalid sequence. */

b &= 63;
c &= 63;
d &= 63;
a = (a << 18) | (b << 12) | (c << 6) | d;

if (a < min)
return -1; /* Overlong sequence. */

if (a > 0x10FFFF)
return -1; /* Four-byte sequence > U+10FFFF. */

if (a >= 0xD800 && a <= 0xDFFF)
return -1; /* Surrogate pair. */

return a;
}

unsigned uv__utf8_decode1(const char** p, const char* pe) {
unsigned a;

a = (unsigned char) *(*p)++;

if (a < 128)
return a; /* ASCII, common case. */

return uv__utf8_decode1_slow(p, pe, a);
}

#define foreach_codepoint(c, p, pe) \
for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)

static int uv__idna_toascii_label(const char* s, const char* se,
char** d, char* de) {
static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
const char* ss;
unsigned c;
unsigned h;
unsigned k;
unsigned n;
unsigned m;
unsigned q;
unsigned t;
unsigned x;
unsigned y;
unsigned bias;
unsigned delta;
unsigned todo;
int first;

h = 0;
ss = s;
todo = 0;

foreach_codepoint(c, &s, se) {
if (c < 128)
h++;
else if (c == (unsigned) -1)
return UV_EINVAL;
else
todo++;
}

if (todo > 0) {
if (*d < de) *(*d)++ = 'x';
if (*d < de) *(*d)++ = 'n';
if (*d < de) *(*d)++ = '-';
if (*d < de) *(*d)++ = '-';
}

x = 0;
s = ss;
foreach_codepoint(c, &s, se) {
if (c > 127)
continue;

if (*d < de)
*(*d)++ = c;

if (++x == h)
break; /* Visited all ASCII characters. */
}

if (todo == 0)
return h;

/* Only write separator when we've written ASCII characters first. */
if (h > 0)
if (*d < de)
*(*d)++ = '-';

n = 128;
bias = 72;
delta = 0;
first = 1;

while (todo > 0) {
m = -1;
s = ss;
foreach_codepoint(c, &s, se)
if (c >= n)
if (c < m)
m = c;

x = m - n;
y = h + 1;

if (x > ~delta / y)
return UV_E2BIG; /* Overflow. */

delta += x * y;
n = m;

s = ss;
foreach_codepoint(c, &s, se) {
if (c < n)
if (++delta == 0)
return UV_E2BIG; /* Overflow. */

if (c != n)
continue;

for (k = 36, q = delta; /* empty */; k += 36) {
t = 1;

if (k > bias)
t = k - bias;

if (t > 26)
t = 26;

if (q < t)
break;

/* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
* 10 <= y <= 35, we can optimize the long division
* into a table-based reciprocal multiplication.
*/
x = q - t;
y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
q = x / y;
t = t + x % y; /* 1 <= t <= 35 because of y. */

if (*d < de)
*(*d)++ = alphabet[t];
}

if (*d < de)
*(*d)++ = alphabet[q];

delta /= 2;

if (first) {
delta /= 350;
first = 0;
}

/* No overflow check is needed because |delta| was just
* divided by 2 and |delta+delta >= delta + delta/h|.
*/
h++;
delta += delta / h;

for (bias = 0; delta > 35 * 26 / 2; bias += 36)
delta /= 35;

bias += 36 * delta / (delta + 38);
delta = 0;
todo--;
}

delta++;
n++;
}

return 0;
}

#undef foreach_codepoint

long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
const char* si;
const char* st;
unsigned c;
char* ds;
int rc;

ds = d;

for (si = s; si < se; /* empty */) {
st = si;
c = uv__utf8_decode1(&si, se);

if (c != '.')
if (c != 0x3002) /* 。 */
if (c != 0xFF0E) /* . */
if (c != 0xFF61) /* 。 */
continue;

rc = uv__idna_toascii_label(s, st, &d, de);

if (rc < 0)
return rc;

if (d < de)
*d++ = '.';

s = si;
}

if (s < se) {
rc = uv__idna_toascii_label(s, se, &d, de);

if (rc < 0)
return rc;
}

if (d < de)
*d++ = '\0';

return d - ds; /* Number of bytes written. */
}
31 changes: 31 additions & 0 deletions src/idna.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/

#ifndef UV_SRC_IDNA_H_
#define UV_SRC_IDNA_H_

/* Decode a single codepoint. Returns the codepoint or UINT32_MAX on error.
* |p| is updated on success _and_ error, i.e., bad multi-byte sequences are
* skipped in their entirety, not just the first bad byte.
*/
unsigned uv__utf8_decode1(const char** p, const char* pe);

/* Convert a UTF-8 domain name to IDNA 2008 / Punycode. A return value >= 0
* is the number of bytes written to |d|, including the trailing nul byte.
* A return value < 0 is a libuv error code. |s| and |d| can not overlap.
*/
long uv__idna_toascii(const char* s, const char* se, char* d, char* de);

#endif /* UV_SRC_IDNA_H_ */
20 changes: 20 additions & 0 deletions src/unix/getaddrinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "uv.h"
#include "internal.h"
#include "idna.h"

#include <errno.h>
#include <stddef.h> /* NULL */
Expand Down Expand Up @@ -141,15 +142,34 @@ int uv_getaddrinfo(uv_loop_t* loop,
const char* hostname,
const char* service,
const struct addrinfo* hints) {
char hostname_ascii[256];
size_t hostname_len;
size_t service_len;
size_t hints_len;
size_t len;
char* buf;
long rc;

if (req == NULL || (hostname == NULL && service == NULL))
return UV_EINVAL;

/* FIXME(bnoordhuis) IDNA does not seem to work z/OS,
* probably because it uses EBCDIC rather than ASCII.
*/
#ifdef __MVS__
(void) &hostname_ascii;
#else
if (hostname != NULL) {
rc = uv__idna_toascii(hostname,
hostname + strlen(hostname),
hostname_ascii,
hostname_ascii + sizeof(hostname_ascii));
if (rc < 0)
return rc;
hostname = hostname_ascii;
}
#endif

hostname_len = hostname ? strlen(hostname) + 1 : 0;
service_len = service ? strlen(service) + 1 : 0;
hints_len = hints ? sizeof(*hints) : 0;
Expand Down
Loading

0 comments on commit 6dd44ca

Please sign in to comment.