unix,win: support IDNA 2008 in uv_getaddrinfo()

Encode domain names before passing them on to the libc resolver. Some getaddrinfo() implementations support IDNA 2008, some only IDNA 2003 and some don't support i18n domain names at all. This is a potential security issue because it means a domain name might resolve differently depending on the system that libuv is running on. Fixes: libuv#2028 PR-URL: libuv#2046 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
bnoordhuis · Oct 30, 2018 · 6dd44ca · 6dd44ca
1 parent 143da93
commit 6dd44ca
Show file tree

Hide file tree

Showing 10 changed files with 566 additions and 2 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -13,6 +13,7 @@ endif()
 
 set(uv_sources
     src/fs-poll.c
+    src/idna.c
     src/inet.c
     src/threadpool.c
     src/timer.c
@@ -64,6 +65,7 @@ set(uv_test_sources
     test/test-homedir.c
     test/test-hrtime.c
     test/test-idle.c
+    test/test-idna.c
     test/test-ip4-addr.c
     test/test-ip6-addr.c
     test/test-ip6-addr.c

diff --git a/Makefile.am b/Makefile.am
@@ -29,6 +29,7 @@ libuv_la_CFLAGS = @CFLAGS@
 libuv_la_LDFLAGS = -no-undefined -version-info 1:0:0
 libuv_la_SOURCES = src/fs-poll.c \
                    src/heap-inl.h \
+                   src/idna.c \
                    src/inet.c \
                    src/queue.h \
                    src/threadpool.c \
@@ -189,6 +190,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \
                          test/test-homedir.c \
                          test/test-hrtime.c \
                          test/test-idle.c \
+                         test/test-idna.c \
                          test/test-ip4-addr.c \
                          test/test-ip6-addr.c \
                          test/test-ipc-heavy-traffic-deadlock-bug.c \

diff --git a/src/idna.c b/src/idna.c
@@ -0,0 +1,291 @@
+/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Derived from https://github.com/bnoordhuis/punycode
+ * but updated to support IDNA 2008.
+ */
+
+#include "uv.h"
+#include "idna.h"
+#include <string.h>
+
+static unsigned uv__utf8_decode1_slow(const char** p,
+                                      const char* pe,
+                                      unsigned a) {
+  unsigned b;
+  unsigned c;
+  unsigned d;
+  unsigned min;
+
+  if (a > 0xF7)
+    return -1;
+
+  switch (*p - pe) {
+  default:
+    if (a > 0xEF) {
+      min = 0x10000;
+      a = a & 7;
+      b = (unsigned char) *(*p)++;
+      c = (unsigned char) *(*p)++;
+      d = (unsigned char) *(*p)++;
+      break;
+    }
+    /* Fall through. */
+  case 2:
+    if (a > 0xDF) {
+      min = 0x800;
+      b = 0x80 | (a & 15);
+      c = (unsigned char) *(*p)++;
+      d = (unsigned char) *(*p)++;
+      a = 0;
+      break;
+    }
+    /* Fall through. */
+  case 1:
+    if (a > 0xBF) {
+      min = 0x80;
+      b = 0x80;
+      c = 0x80 | (a & 31);
+      d = (unsigned char) *(*p)++;
+      a = 0;
+      break;
+    }
+    return -1;  /* Invalid continuation byte. */
+  }
+
+  if (0x80 != (0xC0 & (b ^ c ^ d)))
+    return -1;  /* Invalid sequence. */
+
+  b &= 63;
+  c &= 63;
+  d &= 63;
+  a = (a << 18) | (b << 12) | (c << 6) | d;
+
+  if (a < min)
+    return -1;  /* Overlong sequence. */
+
+  if (a > 0x10FFFF)
+    return -1;  /* Four-byte sequence > U+10FFFF. */
+
+  if (a >= 0xD800 && a <= 0xDFFF)
+    return -1;  /* Surrogate pair. */
+
+  return a;
+}
+
+unsigned uv__utf8_decode1(const char** p, const char* pe) {
+  unsigned a;
+
+  a = (unsigned char) *(*p)++;
+
+  if (a < 128)
+    return a;  /* ASCII, common case. */
+
+  return uv__utf8_decode1_slow(p, pe, a);
+}
+
+#define foreach_codepoint(c, p, pe) \
+  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
+
+static int uv__idna_toascii_label(const char* s, const char* se,
+                                  char** d, char* de) {
+  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
+  const char* ss;
+  unsigned c;
+  unsigned h;
+  unsigned k;
+  unsigned n;
+  unsigned m;
+  unsigned q;
+  unsigned t;
+  unsigned x;
+  unsigned y;
+  unsigned bias;
+  unsigned delta;
+  unsigned todo;
+  int first;
+
+  h = 0;
+  ss = s;
+  todo = 0;
+
+  foreach_codepoint(c, &s, se) {
+    if (c < 128)
+      h++;
+    else if (c == (unsigned) -1)
+      return UV_EINVAL;
+    else
+      todo++;
+  }
+
+  if (todo > 0) {
+    if (*d < de) *(*d)++ = 'x';
+    if (*d < de) *(*d)++ = 'n';
+    if (*d < de) *(*d)++ = '-';
+    if (*d < de) *(*d)++ = '-';
+  }
+
+  x = 0;
+  s = ss;
+  foreach_codepoint(c, &s, se) {
+    if (c > 127)
+      continue;
+
+    if (*d < de)
+      *(*d)++ = c;
+
+    if (++x == h)
+      break;  /* Visited all ASCII characters. */
+  }
+
+  if (todo == 0)
+    return h;
+
+  /* Only write separator when we've written ASCII characters first. */
+  if (h > 0)
+    if (*d < de)
+      *(*d)++ = '-';
+
+  n = 128;
+  bias = 72;
+  delta = 0;
+  first = 1;
+
+  while (todo > 0) {
+    m = -1;
+    s = ss;
+    foreach_codepoint(c, &s, se)
+      if (c >= n)
+        if (c < m)
+          m = c;
+
+    x = m - n;
+    y = h + 1;
+
+    if (x > ~delta / y)
+      return UV_E2BIG;  /* Overflow. */
+
+    delta += x * y;
+    n = m;
+
+    s = ss;
+    foreach_codepoint(c, &s, se) {
+      if (c < n)
+        if (++delta == 0)
+          return UV_E2BIG;  /* Overflow. */
+
+      if (c != n)
+        continue;
+
+      for (k = 36, q = delta; /* empty */; k += 36) {
+        t = 1;
+
+        if (k > bias)
+          t = k - bias;
+
+        if (t > 26)
+          t = 26;
+
+        if (q < t)
+          break;
+
+        /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
+         * 10 <= y <= 35, we can optimize the long division
+         * into a table-based reciprocal multiplication.
+         */
+        x = q - t;
+        y = 36 - t;  /* 10 <= y <= 35 since 1 <= t <= 26. */
+        q = x / y;
+        t = t + x % y;  /* 1 <= t <= 35 because of y. */
+
+        if (*d < de)
+          *(*d)++ = alphabet[t];
+      }
+
+      if (*d < de)
+        *(*d)++ = alphabet[q];
+
+      delta /= 2;
+
+      if (first) {
+        delta /= 350;
+        first = 0;
+      }
+
+      /* No overflow check is needed because |delta| was just
+       * divided by 2 and |delta+delta >= delta + delta/h|.
+       */
+      h++;
+      delta += delta / h;
+
+      for (bias = 0; delta > 35 * 26 / 2; bias += 36)
+        delta /= 35;
+
+      bias += 36 * delta / (delta + 38);
+      delta = 0;
+      todo--;
+    }
+
+    delta++;
+    n++;
+  }
+
+  return 0;
+}
+
+#undef foreach_codepoint
+
+long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
+  const char* si;
+  const char* st;
+  unsigned c;
+  char* ds;
+  int rc;
+
+  ds = d;
+
+  for (si = s; si < se; /* empty */) {
+    st = si;
+    c = uv__utf8_decode1(&si, se);
+
+    if (c != '.')
+      if (c != 0x3002)  /* 。 */
+        if (c != 0xFF0E)  /* ． */
+          if (c != 0xFF61)  /* ｡ */
+            continue;
+
+    rc = uv__idna_toascii_label(s, st, &d, de);
+
+    if (rc < 0)
+      return rc;
+
+    if (d < de)
+      *d++ = '.';
+
+    s = si;
+  }
+
+  if (s < se) {
+    rc = uv__idna_toascii_label(s, se, &d, de);
+
+    if (rc < 0)
+      return rc;
+  }
+
+  if (d < de)
+    *d++ = '\0';
+
+  return d - ds;  /* Number of bytes written. */
+}
diff --git a/src/idna.h b/src/idna.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef UV_SRC_IDNA_H_
+#define UV_SRC_IDNA_H_
+
+/* Decode a single codepoint. Returns the codepoint or UINT32_MAX on error.
+ * |p| is updated on success _and_ error, i.e., bad multi-byte sequences are
+ * skipped in their entirety, not just the first bad byte.
+ */
+unsigned uv__utf8_decode1(const char** p, const char* pe);
+
+/* Convert a UTF-8 domain name to IDNA 2008 / Punycode. A return value >= 0
+ * is the number of bytes written to |d|, including the trailing nul byte.
+ * A return value < 0 is a libuv error code. |s| and |d| can not overlap.
+ */
+long uv__idna_toascii(const char* s, const char* se, char* d, char* de);
+
+#endif  /* UV_SRC_IDNA_H_ */
diff --git a/src/unix/getaddrinfo.c b/src/unix/getaddrinfo.c
@@ -27,6 +27,7 @@
 
 #include "uv.h"
 #include "internal.h"
+#include "idna.h"
 
 #include <errno.h>
 #include <stddef.h> /* NULL */
@@ -141,15 +142,34 @@ int uv_getaddrinfo(uv_loop_t* loop,
                    const char* hostname,
                    const char* service,
                    const struct addrinfo* hints) {
+  char hostname_ascii[256];
   size_t hostname_len;
   size_t service_len;
   size_t hints_len;
   size_t len;
   char* buf;
+  long rc;
 
   if (req == NULL || (hostname == NULL && service == NULL))
     return UV_EINVAL;
 
+  /* FIXME(bnoordhuis) IDNA does not seem to work z/OS,
+   * probably because it uses EBCDIC rather than ASCII.
+   */
+#ifdef __MVS__
+  (void) &hostname_ascii;
+#else
+  if (hostname != NULL) {
+    rc = uv__idna_toascii(hostname,
+                          hostname + strlen(hostname),
+                          hostname_ascii,
+                          hostname_ascii + sizeof(hostname_ascii));
+    if (rc < 0)
+      return rc;
+    hostname = hostname_ascii;
+  }
+#endif
+
   hostname_len = hostname ? strlen(hostname) + 1 : 0;
   service_len = service ? strlen(service) + 1 : 0;
   hints_len = hints ? sizeof(*hints) : 0;