From e77b560ecae3ea6ab72b23682862616ee9ea5b28 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Tue, 28 Jul 2015 18:08:36 -0400
Subject: [PATCH 1/2] Fix incorrect handling of CESU-8 string in
 convert(UTF8String, Vector{UInt8}

---
 base/unicode/utf8.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl
index ec59ec5083ac0..2028f942285b0 100644
--- a/base/unicode/utf8.jl
+++ b/base/unicode/utf8.jl
@@ -267,9 +267,9 @@ function convert(::Type{UTF8String}, dat::Vector{UInt8})
                 buf[out += 1] = dat[pos += 1]
             else
                 # Pick up surrogate pairs (CESU-8 format)
-                ch = (((((ch & 0x3f) << 6) | (dat[pos + 1] & 0x3f)) << 10)
-                        + (((dat[pos + 3] & 0x3f) << 6) | (dat[pos + 4] & 0x3f))
-                        - 0xc00)
+                ch = ((((((ch & 0x3f) << 6) | (dat[pos + 1] & 0x3f)) << 10)
+                       + (((dat[pos + 3] & 0x3f)%UInt32 << 6) | (dat[pos + 4] & 0x3f)))
+                      - 0x01f0c00)
                 pos += 4
                 output_utf8_4byte!(buf, out, ch)
                 out += 4

From f931a0d552bd7d0263fed4c96a766fc78bd1b5c7 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Tue, 28 Jul 2015 22:06:19 -0400
Subject: [PATCH 2/2] Add tests for convert with CESU-8 input

---
 test/unicode.jl      |  1 +
 test/unicode/utf8.jl | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 test/unicode/utf8.jl

diff --git a/test/unicode.jl b/test/unicode.jl
index 1e3c384306cd0..862aa7cf2691d 100644
--- a/test/unicode.jl
+++ b/test/unicode.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
 include("unicode/checkstring.jl")
+include("unicode/utf8.jl")
 include("unicode/utf16.jl")
 include("unicode/utf32.jl")
 include("unicode/utf8proc.jl")
\ No newline at end of file
diff --git a/test/unicode/utf8.jl b/test/unicode/utf8.jl
new file mode 100644
index 0000000000000..af576c52733f9
--- /dev/null
+++ b/test/unicode/utf8.jl
@@ -0,0 +1,12 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+## Test for CESU-8 sequences
+
+let ch = 0x10000
+    for hichar = 0xd800:0xdbff
+        for lochar = 0xdc00:0xdfff
+            @test convert(UTF8String, utf8(Char[hichar, lochar]).data) == string(Char(ch))
+            ch += 1
+        end
+    end
+end